I am trying to implement a project related to projection mapping on processing using a custom projection matrix. I found an example that might give me a clue but it is too old and openGL and Processing change a lot meanwhile. I am not very familiar with shaders and openGL, but so far I could update the old code to the version I show below, so you can also compare with the original.
I am still getting a:
GLException: not a gl2 implementation
I am also a bit confused using PGL, GL, PG2 at same time. I feel this is not a good practice.
Original version of the code from Processing 1.0 forum is here.
This is the code I tried to update so far:
import com.jogamp.opengl.*;
import java.nio.FloatBuffer;
float[] modelview = {
0.670984f, 0.250691f, 0.674993f, 0, -0.288247f,
0.966749f, -0.137371f, 0f, -0.68315f, -0.0505059f, 0.720934f, 0f,
0.164808f, 2.1425f, 32.9616f, 1f };
float[] proj = {
0.78125f, 0, 0, 0, 0, 1.04167f, 0, 0, 0, 0, -1.0002f, -1, 0,
0, -2.0002f, 0 };
FloatBuffer mvbuf;
FloatBuffer projbuf;
void setup() {
size(1024, 768, P3D);
PJOGL.profile = 2; //not sure if needed
mvbuf = FloatBuffer.wrap(modelview);
projbuf= FloatBuffer.wrap(proj);
GLProfile glp = GLProfile.get(GLProfile.GL2);
GLCapabilitiesImmutable glcaps = (GLCapabilitiesImmutable) new GLCapabilities(glp);
GLCapabilities tGLCapabilities = new GLCapabilities(glp);
println("System Capabilities:" + glcaps.toString());
println("Profile Details: " + glp.toString());
println("Is GL2 Supported?: " + glp.isGL2());
}
void draw() {
background(0);
PGL pgl = (PJOGL) beginPGL();
GL gl = ((PJOGL) pgl).gl;
GL2 gl2 = gl.getGL2(); //GLException: not a GL2 implemantation
gl2.glMatrixMode(GL2.GL_PROJECTION);
gl2.glLoadIdentity();
gl2.glLoadMatrixf(projbuf);
gl2.glMatrixMode(GL2.GL_MODELVIEW);
gl2.glLoadIdentity();
gl2.glLoadMatrixf(mvbuf);
drawGrid(100, 10, gl2);
endPGL(); //not sure if this is closing what it supposed to
}
void drawGrid(float len, float offset, GL2 g) {
int nr_lines = (int)(len/offset);
g.glColor3f(1, 1, 1);
g.glBegin(g.GL_LINES);
for (int i=-nr_lines; i<nr_lines; i++) {
g.glVertex3f(i*offset, 0, -nr_lines*offset);
g.glVertex3f(i*offset, 0, nr_lines*offset);
}
for (int i=-nr_lines; i<nr_lines; i++) {
g.glVertex3f(-nr_lines*offset, 0, i*offset);
g.glVertex3f(nr_lines*offset, 0, i*offset);
}
g.glEnd();
}
First try this:
PGraphicsOpenGL pg = (PGraphicsOpenGL)g;
println(pg.OPENGL_VERSION);
What does it output? For me this would output:
4.5.0 NVIDIA 376.51
Thereby calling gl.getGL2() fails because an OpenGL 4.5 core context is not backwards compatible with an OpenGL 2.x context.
If I recall correctly then you do have to use PJOGL.profile and set it to 1, to get a backwards compatible context:
PJOGL.profile = 1;
Note that if you're using Processing 3.0, then you might have to utilize settings():
void settings() {
size(1024, 768, P3D);
PJOGL.profile = 1;
}
Related
I am trying to develop an ML powered plugin for a real-time image processing software, that provides image data as cudaArray_t on the GPU, but because the software locks me into an older CUDA version, I would like to do this with DirectML (the software is Windows only anyways).
For latency reasons, I don't want to do any unnecessary GPU-CPU-GPU roundtrips. To do this, I thought that I would need to map the CUDA data to D3D12 resources, which then can be used to create input and output tensors to bind to the model. I have found a sample that uses the CUDA External Resource Interoperability API to map a cudaArray_t into a ID3D12Resource here that I am trying to base my code on. As I don't need to render anything, I thought I was able to simply create the heap and resource and then copy the incoming cudaArray_tinto the interop cudaArray_t as shown below, without needing to create any sort of command queue. Note that the missing code is the same as in the linked github repo above, so I left it out for conciseness.
This approach does not work, but I am not sure how to debug this, as I am new to Direct3D programming and GPU programming in general. I am using the official Direct3D 12 docs as a reference, but it is a bit overwhelming, so some direction on what should be fixed here would be greatly appreciated :) I was thinking that I need to use a semaphore for some kind of syncing, but I am not sure if that works without creating some sort of command queue.
bool initD3d12() {
// setup the d3d12 device
UINT dxgiFactoryFlags = 0;
winrt::com_ptr<IDXGIFactory4> factory;
winrt::check_hresult(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(factory.put())));
winrt::com_ptr<IDXGIAdapter1> hardwareAdapter;
GetHardwareAdapter(factory.get(), hardwareAdapter.put());
winrt::check_hresult(D3D12CreateDevice(hardwareAdapter.get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(m_d3d12Device.put())));
DXGI_ADAPTER_DESC1 desc;
hardwareAdapter->GetDesc1(&desc);
m_dx12deviceluid = desc.AdapterLuid;
return true;
}
void initCuda() {
// setup the cuda device
int num_cuda_devices = 0;
checkCudaErrors(cudaGetDeviceCount(&num_cuda_devices));
if (!num_cuda_devices) {
throw std::exception("No CUDA Devices found");
}
for (int devId = 0; devId < num_cuda_devices; devId++) {
cudaDeviceProp devProp;
checkCudaErrors(cudaGetDeviceProperties(&devProp, devId));
if ((memcmp(&m_dx12deviceluid.LowPart, devProp.luid,
sizeof(m_dx12deviceluid.LowPart)) == 0) &&
(memcmp(&m_dx12deviceluid.HighPart,
devProp.luid + sizeof(m_dx12deviceluid.LowPart),
sizeof(m_dx12deviceluid.HighPart)) == 0)) {
checkCudaErrors(cudaSetDevice(devId));
m_cudaDeviceID = devId;
m_nodeMask = devProp.luidDeviceNodeMask;
checkCudaErrors(cudaStreamCreate(&m_streamToRun));
printf("CUDA Device Used [%d] %s\n", devId, devProp.name);
break;
}
}
}
void copyArrayToResource(cudaArray_t cudaArray) {
// then we want to copy cudaArray to the D3D texture, via its mapped form : cudaArray
cudaMemcpy2DArrayToArray(
m_cudaArray, // dst array
0, 0, // offset
cudaArray, 0, 0, // src
m_width * 4 * sizeof(float), m_height, // extent
cudaMemcpyDeviceToDevice); // kind
}
void createResource(size_t width, size_t height, ID3D12Resource** d3d12Resource) {
// Create a d3d12 resource in the desired size and map it to a cudaArray
m_width = width;
m_height = height;
// Create D3D12 2DTexture
// Assume 32-Bit float RGBA image
const auto channels = 4;
const auto textureSurface = width * height;
const auto texturePixels = textureSurface * channels;
const auto textureSizeBytes = sizeof(float)* texturePixels;
const auto texFormat = channels == 4 ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT;
const auto texDesc = CD3DX12_RESOURCE_DESC::Tex2D(texFormat, width, height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS);
D3D12_HEAP_PROPERTIES heapProperties = {
D3D12_HEAP_TYPE_DEFAULT,
D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
D3D12_MEMORY_POOL_UNKNOWN,
0,
0};
winrt::check_hresult(m_d3d12Device->CreateCommittedResource(
&heapProperties,
D3D12_HEAP_FLAG_SHARED,
&texDesc,
D3D12_RESOURCE_STATE_COMMON,
nullptr,
IID_PPV_ARGS(d3d12Resource)));
// Create CUDA external resource
HANDLE sharedHandle;
WindowsSecurityAttributes windowsSecurityAttributes{};
LPCWSTR name = NULL;
winrt::check_hresult(m_d3d12Device->CreateSharedHandle(
*d3d12Resource, &windowsSecurityAttributes, GENERIC_ALL, 0,
&sharedHandle));
D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo;
d3d12ResourceAllocationInfo = m_d3d12Device->GetResourceAllocationInfo(
m_nodeMask, 1, &texDesc);
size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes;
size_t alignment = d3d12ResourceAllocationInfo.Alignment;
cudaExternalMemoryHandleDesc externalMemoryHandleDesc;
memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc));
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
externalMemoryHandleDesc.size = actualSize;
externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated;
checkCudaErrors(
cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc));
cudaExternalMemoryMipmappedArrayDesc cuExtmemMipDesc{};
cuExtmemMipDesc.extent = make_cudaExtent(width, height, 0);
cuExtmemMipDesc.formatDesc = cudaCreateChannelDesc<float4>();
cuExtmemMipDesc.numLevels = 1;
cuExtmemMipDesc.flags = cudaArrayDefault;
cudaMipmappedArray_t cuMipArray{};
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cuMipArray, m_externalMemory, &cuExtmemMipDesc));
checkCudaErrors(cudaGetMipmappedArrayLevel(&m_cudaArray, cuMipArray, 0));
}
In the end if the mapping to a ID3D12Resource would work, I assume that one could use the ITensorStaticsNative interface to create a tensor to bind to the output or input of a LearningModel.
I'm facing some strange difficulties with OpenGL buffer. I tried to shrunk the problem to the minimum source code, so I created program that increment each number of the FloatBuffer in each iteration. When I am adding less than 2^16 float numbers to the FloatBuffer, everything works just fine, but when I add >= 2^16 numbers, then the numbers are not incrementing and stays the same in each iteration.
Renderer:
public class Renderer extends AbstractRenderer {
int computeShaderProgram;
int[] locBuffer = new int[2];
FloatBuffer data;
int numbersCount = 65_536, round = 0; // 65_535 - OK, 65_536 - wrong
#Override
public void init() {
computeShaderProgram = ShaderUtils.loadProgram(null, null, null, null, null,
"/main/computeBuffer");
glGenBuffers(locBuffer);
// dataSizeInBytes = count of numbers to sort * (float=4B + padding=3*4B)
int dataSizeInBytes = numbersCount * (1 + 3) * 4;
data = ByteBuffer.allocateDirect(dataSizeInBytes)
.order(ByteOrder.nativeOrder())
.asFloatBuffer();
initBuffer();
printBuffer(data);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, locBuffer[0]);
glBufferData(GL_SHADER_STORAGE_BUFFER, data, GL_DYNAMIC_DRAW);
glShaderStorageBlockBinding(computeShaderProgram, 0, 0);
glViewport(0, 0, width, height);
}
private void initBuffer() {
data.rewind();
Random r = new Random();
for (int i = 0; i < numbersCount; i++) {
data.put(i*4, r.nextFloat());
}
}
#Override
public void display() {
if (round < 5) {
glUseProgram(computeShaderProgram);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, locBuffer[0]);
glDispatchCompute(numbersCount, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, data);
printBuffer(data);
round++;
}
}
...
}
Compute buffer
#version 430
#extension GL_ARB_compute_shader : enable
#extension GL_ARB_shader_storage_buffer_object : enable
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0) buffer Input {
float elements[];
}input_data;
void main () {
input_data.elements[gl_WorkGroupID.x ] = input_data.elements[gl_WorkGroupID.x] + 1;
}
glDispatchCompute(numbersCount, 1, 1);
You must not dispatch a compute shader workgroup count exceeding the corresponding GL_MAX_GL_MAX_COMPUTE_WORK_GROUP_COUNT for each dimension. The spec guarantees that limit to be at least 65535, so it is very likely that you just exceed the limit on your implementation. Actually, you should be getting a GL_INVALID_VALUE error for that call, and you should really consider using a debug context and debug message callback to have such obvious errors easily spotted during development.
I've just started up using Direct compute in an attempt to move a fluid simulation I have been working on, onto the GPU. I have found a very similar (if not identical) question here however seems the resolution to my problem is not the same as theirs; I do have my CopyResource the right way round for sure! As with the pasted question, I only get a buffer filled with 0's when copy back from the GPU. I really can't see the error as I don't understand how I can be reaching out of bounds limits. I'm going to apologise for the mass amount of code pasting about to occur but I want be sure I've not got any of the setup wrong.
Output Buffer, UAV and System Buffer set up
outputDesc.Usage = D3D11_USAGE_DEFAULT;
outputDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
outputDesc.ByteWidth = sizeof(BoundaryConditions) * numElements;
outputDesc.CPUAccessFlags = 0;
outputDesc.StructureByteStride = sizeof(BoundaryConditions);
outputDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
result =_device->CreateBuffer(&outputDesc, 0, &m_outputBuffer);
outputDesc.Usage = D3D11_USAGE_STAGING;
outputDesc.BindFlags = 0;
outputDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
result = _device->CreateBuffer(&outputDesc, 0, &m_outputresult);
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc;
uavDesc.Format = DXGI_FORMAT_UNKNOWN;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
uavDesc.Buffer.FirstElement = 0;
uavDesc.Buffer.Flags = 0;
uavDesc.Buffer.NumElements = numElements;
result =_device->CreateUnorderedAccessView(m_outputBuffer, &uavDesc, &m_BoundaryConditionsUAV);
Running the Shader in my frame loop
HRESULT result;
D3D11_MAPPED_SUBRESOURCE mappedResource;
_deviceContext->CSSetShader(m_BoundaryConditionsCS, nullptr, 0);
_deviceContext->CSSetUnorderedAccessViews(0, 1, &m_BoundaryConditionsUAV, 0);
_deviceContext->Dispatch(1, 1, 1);
// Unbind output from compute shader
ID3D11UnorderedAccessView* nullUAV[] = { NULL };
_deviceContext->CSSetUnorderedAccessViews(0, 1, nullUAV, 0);
// Disable Compute Shader
_deviceContext->CSSetShader(nullptr, nullptr, 0);
_deviceContext->CopyResource(m_outputresult, m_outputBuffer);
D3D11_MAPPED_SUBRESOURCE mappedData;
result = _deviceContext->Map(m_outputresult, 0, D3D11_MAP_READ, 0, &mappedData);
BoundaryConditions* newbc = reinterpret_cast<BoundaryConditions*>(mappedData.pData);
for (int i = 0; i < 4; i++)
{
Debug::Instance()->Log(newbc[i].x.x);
}
_deviceContext->Unmap(m_outputresult, 0);
HLSL
struct BoundaryConditions
{
float3 x;
float3 y;
};
RWStructuredBuffer<BoundaryConditions> _boundaryConditions;
[numthreads(4, 1, 1)]
void ComputeBoundaryConditions(int3 id : SV_DispatchThreadID)
{
_boundaryConditions[id.x].x = float3(id.x,id.y,id.z);
}
I dispatch the Compute shader after I begin a frame and before I end the frame. I have played around with moving the shaders dispatch call outside of the end scene and before the present ect but nothing seems to effect the process. Can't seem to figure this one out!
Holy Smokes I fixed the error! I was creating the compute shader to a different ID3D11ComputeShader pointer! D: Works like a charm! Pheew Sorry and thanks Adam!
I am tinkering with DirectX 12 and have hit a wall while trying to draw a "checker board." I have search the net quite a bit, so any help/pointers will be appreciated.
In D3D11 the working code is as follows.
auto context = m_deviceResources->GetD3DDeviceContext();
for (int i = -10; i < 10; i++)
{
for (int j = -10; j < 10; j++)
{
// perform translation
XMStoreFloat4x4(&m_constantBufferData.model, XMMatrixTranspose(XMMatrixTranslation(i, j, 0.0f)));
context->UpdateSubresource(
m_constantBuffer.Get(),
0,
NULL,
&m_constantBufferData,
0,
0
);
// shaders, etc...
// draw the square
context->DrawIndexed(
m_indexCount,
0,
0
);
}
}
In D3D12, I have tried doing the same thing, but it appears to be performing the translation globally, as all square are in the same location.
bool Sample3DSceneRenderer::Render()
{
if (!m_loadingComplete)
{
return false;
}
DX::ThrowIfFailed(m_deviceResources->GetCommandAllocator()->Reset());
DX::ThrowIfFailed(m_commandList->Reset(m_deviceResources->GetCommandAllocator(), m_pipelineState.Get()));
PIXBeginEvent(m_commandList.Get(), 0, L"Draw the objects");
{
m_commandList->SetGraphicsRootSignature(m_rootSignature.Get());
ID3D12DescriptorHeap* ppHeaps[] = { m_cbvHeap.Get() };
m_commandList->SetDescriptorHeaps(_countof(ppHeaps), ppHeaps);
CD3DX12_GPU_DESCRIPTOR_HANDLE gpuHandle(m_cbvHeap->GetGPUDescriptorHandleForHeapStart(), m_deviceResources->GetCurrentFrameIndex(), m_cbvDescriptorSize);
m_commandList->SetGraphicsRootDescriptorTable(0, gpuHandle);
D3D12_VIEWPORT viewport = m_deviceResources->GetScreenViewport();
m_commandList->RSSetViewports(1, &viewport);
m_commandList->RSSetScissorRects(1, &m_scissorRect);
CD3DX12_RESOURCE_BARRIER renderTargetResourceBarrier =
CD3DX12_RESOURCE_BARRIER::Transition(m_deviceResources->GetRenderTarget(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET);
m_commandList->ResourceBarrier(1, &renderTargetResourceBarrier);
D3D12_CPU_DESCRIPTOR_HANDLE renderTargetView = m_deviceResources->GetRenderTargetView();
D3D12_CPU_DESCRIPTOR_HANDLE depthStencilView = m_deviceResources->GetDepthStencilView();
m_commandList->ClearRenderTargetView(renderTargetView, m_colors.Get_background(), 0, nullptr);
m_commandList->ClearDepthStencilView(depthStencilView, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr);
m_commandList->OMSetRenderTargets(1, &renderTargetView, false, &depthStencilView);
m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView);
m_commandList->IASetIndexBuffer(&m_indexBufferView);
for (float i = -10.0f; i < 10.0f; i++)
{
for (float j = -10.0f; j < 10.0f; j++)
{
// as far as I know, this is how I should perform the translation
XMStoreFloat4x4(&m_constantBufferData.model, XMMatrixTranspose(XMMatrixTranslation(i, j, 0.0f)));
UINT8* destination = m_mappedConstantBuffer + (m_deviceResources->GetCurrentFrameIndex() * c_alignedConstantBufferSize);
memcpy(destination, &m_constantBufferData, sizeof(m_constantBufferData));
m_commandList->DrawIndexedInstanced(6, 1, 0, 0, 0);
}
}
CD3DX12_RESOURCE_BARRIER presentResourceBarrier =
CD3DX12_RESOURCE_BARRIER::Transition(m_deviceResources->GetRenderTarget(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT);
m_commandList->ResourceBarrier(1, &presentResourceBarrier);
}
PIXEndEvent(m_commandList.Get());
DX::ThrowIfFailed(m_commandList->Close());
ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
m_deviceResources->GetCommandQueue()->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
return true;
}
Thank you,
Chelsey
You're just writing your translation matrix to the same piece of memory for every copy of the model. Since the GPU hasn't even begun drawing the first model by the time you've finished writing the translation matrix for the last one, the only place any of these models are going to draw is at the place of the last translation matrix written.
You need to write each matrix to a separate, distinct location in memory and ensure they're not overwritten by anything else until the GPU has finished drawing the models.
The act of calling DrawIndexedInstanced does not immediately instruct the GPU to draw anything, it merely adds a command to a command list to draw the object at some time in the future. If you're not familiar with the asynchronous nature of Graphics APIs and GPU execution you should probably read up a bit more on how it works.
We are developing software for slide show creation and use OpenGL.
We use FBO + PBO for fast data reading from VGA to RAM but on some video cards from ATI we faced with the following problems:
swapping RGB components
pixel shifting
There are no problems if we do not use PBO.
Also we have noticed that the aspect ratio of PBO/FBO (4:3) solve the pixel shifting problem.
Any thoughts or suggestions?
Here are more details:
ATI Radeon HD 3650
PBO code:
public bool PBO_Initialize(
int bgl_size_w,
int bgl_size_h)
{
PBO_Release();
if (mCSGL12Control1 != null)
{
GL mGL = mCSGL12Control1.GetGL();
mCSGL12Control1.wgl_MakeCurrent();
//
// check PBO is supported by your video card
if (mGL.bglGenBuffersARB == true &&
mGL.bglBindBufferARB == true &&
mGL.bglBufferDataARB == true &&
mGL.bglBufferSubDataARB == true &&
mGL.bglMapBufferARB == true &&
mGL.bglUnmapBufferARB == true &&
mGL.bglDeleteBuffersARB == true &&
mGL.bglGetBufferParameterivARB == true)
{
mGL.glGenBuffersARB(2, _pbo_imageBuffers);
int clientHeight1 = bgl_size_h / 2;
int clientHeight2 = bgl_size_h - clientHeight1;
int clientSize1 = bgl_size_w * clientHeight1 * 4;
int clientSize2 = bgl_size_w * clientHeight2 * 4;
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[0]);
mGL.glBufferDataARB(GL.GL_PIXEL_PACK_BUFFER_ARB, clientSize1, IntPtr.Zero,
GL.GL_STREAM_READ_ARB);
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[1]);
mGL.glBufferDataARB(GL.GL_PIXEL_PACK_BUFFER_ARB, clientSize2, IntPtr.Zero,
GL.GL_STREAM_READ_ARB);
return true;
}
}
return false;
}
...
PBO read data back to memory
int clientHeight1 = _bgl_size_h / 2;
int clientHeight2 = _bgl_size_h - clientHeight1;
int clientSize1 = _bgl_size_w * clientHeight1 * 4;
int clientSize2 = _bgl_size_w * clientHeight2 * 4;
//mGL.glPushAttrib(GL.GL_VIEWPORT_BIT | GL.GL_COLOR_BUFFER_BIT);
// Bind two different buffer objects and start the glReadPixels
// asynchronously. Each call will return directly after
// starting the DMA transfer.
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[0]);
mGL.glReadPixels(0, 0, _bgl_size_w, clientHeight1, imageFormat,
pixelTransferMethod, IntPtr.Zero);
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[1]);
mGL.glReadPixels(0, clientHeight1, _bgl_size_w, clientHeight2, imageFormat,
pixelTransferMethod, IntPtr.Zero);
//mGL.glPopAttrib();
mGL.glBindFramebufferEXT(GL.GL_FRAMEBUFFER_EXT, 0);
// Process partial images. Mapping the buffer waits for
// outstanding DMA transfers into the buffer to finish.
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[0]);
IntPtr pboMemory1 = mGL.glMapBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB,
GL.GL_READ_ONLY_ARB);
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[1]);
IntPtr pboMemory2 = mGL.glMapBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB,
GL.GL_READ_ONLY_ARB);
System.Runtime.InteropServices.Marshal.Copy(pboMemory1, _bgl_rgbaData_out, 0, clientSize1);
System.Runtime.InteropServices.Marshal.Copy(pboMemory2, _bgl_rgbaData_out, clientSize1, clientSize2);
// Unmap the image buffers
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[0]);
mGL.glUnmapBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB);
mGL.glBindBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB, _pbo_imageBuffers[1]);
mGL.glUnmapBufferARB(GL.GL_PIXEL_PACK_BUFFER_ARB);
FBO initialization
private static void FBO_Initialize(GL mGL,
ref int[] bgl_texture,
ref int[] bgl_framebuffer,
ref int[] bgl_renderbuffer,
ref byte[] bgl_rgbaData,
int bgl_size_w,
int bgl_size_h)
{
// Texture
mGL.glGenTextures(1, bgl_texture);
mGL.glBindTexture(GL.GL_TEXTURE_2D, bgl_texture[0]);
mGL.glTexParameteri(GL.GL_TEXTURE_2D, GL.GL_TEXTURE_MAG_FILTER, GL.GL_NEAREST);
mGL.glTexParameteri(GL.GL_TEXTURE_2D, GL.GL_TEXTURE_MIN_FILTER, GL.GL_NEAREST);
mGL.glTexParameteri(GL.GL_TEXTURE_2D, GL.GL_TEXTURE_WRAP_S, GL.GL_CLAMP_TO_EDGE);
mGL.glTexParameteri(GL.GL_TEXTURE_2D, GL.GL_TEXTURE_WRAP_T, GL.GL_CLAMP_TO_EDGE);
IntPtr null_ptr = new IntPtr(0);
// <null> means reserve texture memory, but texels are undefined
mGL.glTexImage2D(GL.GL_TEXTURE_2D, 0, GL.GL_RGBA, bgl_size_w, bgl_size_h, 0, GL.GL_RGBA, GL.GL_UNSIGNED_BYTE, null_ptr);
//
mGL.glGenFramebuffersEXT(1, bgl_framebuffer);
mGL.glBindFramebufferEXT(GL.GL_FRAMEBUFFER_EXT, bgl_framebuffer[0]);
mGL.glGenRenderbuffersEXT(1, bgl_renderbuffer);
mGL.glBindRenderbufferEXT(GL.GL_RENDERBUFFER_EXT, bgl_renderbuffer[0]);
mGL.glRenderbufferStorageEXT(GL.GL_RENDERBUFFER_EXT, GL.GL_DEPTH_COMPONENT24, bgl_size_w, bgl_size_h);
mGL.glFramebufferTexture2DEXT(GL.GL_FRAMEBUFFER_EXT, GL.GL_COLOR_ATTACHMENT0_EXT,
GL.GL_TEXTURE_2D, bgl_texture[0], 0);
mGL.glFramebufferRenderbufferEXT(GL.GL_FRAMEBUFFER_EXT, GL.GL_DEPTH_ATTACHMENT_EXT,
GL.GL_RENDERBUFFER_EXT, bgl_renderbuffer[0]);
// Errors?
int status = mGL.glCheckFramebufferStatusEXT(GL.GL_FRAMEBUFFER_EXT);
if (status != GL.GL_FRAMEBUFFER_COMPLETE_EXT || mGL.glGetError() != GL.GL_NO_ERROR)
{
mGL.glFramebufferTexture2DEXT(GL.GL_FRAMEBUFFER_EXT, GL.GL_COLOR_ATTACHMENT0_EXT,
GL.GL_TEXTURE_2D, 0, 0);
mGL.glFramebufferRenderbufferEXT(GL.GL_FRAMEBUFFER_EXT, GL.GL_DEPTH_ATTACHMENT_EXT,
GL.GL_RENDERBUFFER_EXT, 0);
mGL.glBindTexture(GL.GL_TEXTURE_2D, 0);
mGL.glDeleteTextures(1, bgl_texture);
mGL.glBindRenderbufferEXT(GL.GL_RENDERBUFFER_EXT, 0);
mGL.glDeleteRenderbuffersEXT(1, bgl_renderbuffer);
mGL.glBindFramebufferEXT(GL.GL_FRAMEBUFFER_EXT, 0);
mGL.glDeleteFramebuffersEXT(1, bgl_framebuffer);
throw new Exception("Bad framebuffer.");
}
mGL.glDrawBuffer(GL.GL_COLOR_ATTACHMENT0_EXT);
mGL.glReadBuffer(GL.GL_COLOR_ATTACHMENT0_EXT); // For glReadPixels()
mGL.glBindFramebufferEXT(GL.GL_FRAMEBUFFER_EXT, 0);
mGL.glDrawBuffer(GL.GL_BACK);
mGL.glReadBuffer(GL.GL_BACK);
mGL.glBindTexture(GL.GL_TEXTURE_2D, 0);
bgl_rgbaData = new byte[bgl_size_w * bgl_size_h * 4];
}
It seems that re-installing/updating VGA Driver does solve this problem.
Really strange behaviour (also, it may be that the official notebook driver is old/buggy/etc. and causes the problem, so updating with the latest driver from AMD, for this vga-chip series, seems affect/solve the problem. Also I'm not sure if the previouse driver was set up correct thus I say re-installing/updating)
Thank you all for help.