Entering protected mode reboots - protected-mode

For understanding the working of the operating system I have made a simpel bootloader which loads a small test application for testing protected mode etc. After the bootsector has been loaded at 0x7c00 the bootloader loads the test code at segment 0x2000 and starts the first instruction. But when I try to enter the protected mode the systems reboots. Can anyone help me with this problem?
This is my code at section 0x2000
BITS 16
; Entering_ProtectedMode:
cli
mov ax, 2000h
mov ss, ax
mov sp, 0FFFFh
sti
cld
mov ax, 2000h
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
;xor ax, ax
;mov ds, ax ; update data segment
cli ; clear interrupts
lgdt [gdtr] ; load GDT from GDTR (see gdt_32.inc)
call OpenA20Gate ; open the A20 gate
call EnablePMode ; jumps to ProtectedMode
OpenA20Gate:
in al, 0x93 ; switch A20 gate via fast A20 port 92
or al, 2 ; set A20 Gate bit 1
and al, ~1 ; clear INIT_NOW bit
out 0x92, al
ret
EnablePMode:
mov eax, cr0
or eax, 1
mov cr0, eax
jmp CODE_SEG : ProtectedMode
;*********************************
;* Global Descriptor Table (GDT) *
;*********************************
NULL_DESC:
dd 0 ; null descriptor
dd 0
CODE_DESC:
dw 0xFFFF ; limit low
dw 0 ; base low
db 0 ; base middle
db 10011010b ; access
db 11001111b ; granularity
db 0 ; base high
DATA_DESC:
dw 0xFFFF ; limit low
dw 0 ; base low
db 0 ; base middle
db 10010010b ; access
db 11001111b ; granularity
db 0 ; base high
gdtr:
dw gdtr - NULL_DESC - 1 ; length of GDT
dd NULL_DESC ; base of GDT
CODE_SEG equ CODE_DESC - NULL_DESC
DATA_SEG equ DATA_DESC - NULL_DESC
;******************
;* Protected Mode *
;******************
BITS 32
ProtectedMode:
.halt:
jmp .halt
;mov ax, 10h
;mov ds, ax ; update data segment

Try this one:
OpenA20Gate:
in al, 0x92; instead of 0x93; switch A20 gate via fast A20 port 92
or al, 2 ; set A20 Gate bit 1
and al, ~1 ; clear INIT_NOW bit
out 0x92, al
ret
EnablePMode:
mov eax, cr0
or eax, 1
mov cr0, eax
......
;CODE_SEG must be equal 8 like "8:ProtectedMode". 8 points at the first descriptor of the GDT. 16 bit: 15-3 - index in the GDT (0 for the zero descriptor, 1 for the first descriptor - code, 2 for the second descriptor - data), 2 - the table indicator (0 for the GDT, 1 for the LDT), 1-0 - RPL (the request privilege level).
jmp CODE_SEG:ProtectedMode

Related

Why is GCC subtracting 1 and comparing <= 2? Is cmp faster with powers of two in assembly?

I was writing some code to clear the screen to a particular color. C++ code:
void clear_screen(unsigned int color, void *memory, int height, int width) {
unsigned int *pixel = (unsigned int *)memory;
for (auto y = 0; y < height; y++)
for (auto x = 0; x < width; x++)
*pixel++ = color;
}
I used g++ and objconv to generate the corresponding assembly. This is what I got, and I've commented what I think some of the lines do too.
renderer_clear_screen:
push r13
push r12
push rbp
push rdi
push rsi
push rbx
mov r11d, ecx ; move the color into r11d
mov ebx, r8d ; move the height into ebx
mov rcx, rdx ; 000E _ 48: 89. D1st
test r8d, r8d ;
jle _cls_return ; basically, return if width or height is 0
test r9d, r9d ; ( window minimized )
jle _cls_return ;
mov r8d, r9d ; height = width
mov esi, r9d ; esi = width
mov edi, r9d ; edi = width
xor r10d, r10d ; r10d = 0
shr esi, 2 ; esi = width / 2
movd xmm1, r11d ; move the lower 32-bits of the color into xmm1
lea r12d, [r9-1] ; r12d = width - 1
shl rsi, 4 ; 003F _ 48: C1. E6, 04
mov ebp, r8d ; 0043 _ 44: 89. C5
shl rdi, 2 ; 0046 _ 48: C1. E7, 02
pshufd xmm0, xmm1, 0 ; 004A _ 66: 0F 70. C1, 00
shl rbp, 2 ; 004F _ 48: C1. E5, 02
ALIGN 8
?_001: cmp r12d, 2
jbe ?_006 ; if (width - 1 <= 2) { ?_006 }
mov rax, rcx ; 005E _ 48: 89. C8
lea rdx, [rcx+rsi] ; 0061 _ 48: 8D. 14 31
ALIGN 8
?_002: movups oword [rax], xmm0 ; 0068 _ 0F 11. 00
add rax, 16 ; 006B _ 48: 83. C0, 10
cmp rdx, rax ; 006F _ 48: 39. C2
jnz ?_002 ; 0072 _ 75, F4
lea rdx, [rcx+rbp] ; 0074 _ 48: 8D. 14 29
mov eax, r8d ; 0078 _ 44: 89. C0
cmp r9d, r8d ; 007B _ 45: 39. C1
jz ?_004 ; 007E _ 74, 1C
?_003: lea r13d, [rax+1H] ; 0080 _ 44: 8D. 68, 01
mov dword [rdx], r11d ; 0084 _ 44: 89. 1A
cmp r13d, r9d ; 0087 _ 45: 39. CD
jge ?_004 ; 008A _ 7D, 10
add eax, 2 ; 008C _ 83. C0, 02
mov dword [rdx+4H], r11d ; 008F _ 44: 89. 5A, 04
cmp r9d, eax ; 0093 _ 41: 39. C1
jle ?_004 ; 0096 _ 7E, 04
mov dword [rdx+8H], r11d ; 0098 _ 44: 89. 5A, 08
?_004: add r10d, 1 ; 009C _ 41: 83. C2, 01
add rcx, rdi ; 00A0 _ 48: 01. F9
cmp ebx, r10d ; 00A3 _ 44: 39. D3
jnz ?_001 ; 00A6 _ 75, B0
_cls_return:
pop rbx ;
pop rsi ;
pop rdi ;
pop rbp ;
pop r12 ;
pop r13 ; pop all the saved registers
ret ;
?_006: ; Local function
mov rdx, rcx ; 00B1 _ 48: 89. CA
xor eax, eax ; 00B4 _ 31. C0
jmp ?_003 ; 00B6 _ EB, C8
Now, in ?_001, the compiler compares width - 1 to 2, which is the same thing as comparing the width to 3. My question is, with -O3, why did the compiler choose two instead of three, and waste a lea (to move width - 1 into r12d).
The only thing which makes sense to me is that powers of two are somehow faster to compare. Or maybe it's a compiler quirk?
The usual reason for GCC tweaking compare constants is to create smaller immediates, which helps it fit in an immediate of whatever width. Understanding gcc output for if (a>=3) / GCC seems to prefer small immediate values in comparisons. Is there a way to avoid that? (It always does it, instead of checking whether it's actually useful with this constant on the target ISA.) This heuristic works well for most ISAs, but sometimes not for AArch64 or ARM Thumb which can encode some immediates as a bit-range / bit-pattern, so it's not always the case that a smaller-magnitude number is better.
The width-1 is not part of that. The -1 is part of a range check to skip the auto-vectorized loop (16 bytes at a time with movups) and go straight to the cleanup, 1..3 scalar stores.
It seems to be checking width >= 1 && width <= 3, i.e. cleanup needed but total size is less than a full vector width. It's not equivalent to signed or unsigned width <= 3 for width=0. Note the unsigned compare: 0 - 1 is above 2U, because -1U is UINT_MAX.
But it already excluded width <= 0 with test r9d, r9d / jle _cls_return, so it would have been better for GCC to just check width <= 3U instead of doing extra work to exclude zero from the range-check. (An lea, and save/restore of R12 which isn't otherwise used!)
(The cleanup could also looks over-complicated, e.g. using movq [rdx], xmm0 if more than 1 uint is needed, and some weird branching around for various cases. And even better, if the total size is >= 4 uints, just do another movups that ends at the end of the range, possibly overlapping with previous stores.)
Yes, this is a missed optimization, you can report it on https://gcc.gnu.org/bugzilla/enter_bug.cgi?product=gcc (now that you know it's a missed optimization; it's good that you asked here first instead of filing a bug without first figuring out if the instruction could be avoided.)
The only thing which makes sense to me is that powers of two are somehow faster to compare.
No, it's not faster; cmp performance is not data-dependent at all. (No integer instructions are, except sometimes [i]div. And on AMD CPUs before Zen3, pext / pdep. But anyway, not simple integer add/compare/shift stuff. See https://uops.info/).
And BTW, we can reproduce your GCC asm output on Godbolt by telling it this function is __attribute__((ms_abi)), or there's a command-line option to set the calling convention default. (It's really only useful for looking at the asm; it's still using GNU/Linux headers and x86-64 System V type widths like 64-bit long. Only a proper MinGW (cross-)compiler could show you what GCC would really do when targeting Windows.)
It's GAS .intel_syntax noprefix, which is MASM-like, not NASM, but the difference would only be obvious with addressing modes involving global variables.

Hanging of XShmPutImage event notification

I am using XShm extension to draw and manipulate images in Linux.
In order to not have screen flickering, I am passing send_event = TRUE to XShmPutImage and then waiting for the event with XIfEvent, immediately after the call to XScmPutImage.
This way, I am making the image drawing blocking in order to not changing the image until it is displayed on the window surface.
Usually everything works fine. But sometimes, when I have intensive image drawing, it seems that the event never comes and the drawing procedure hangs.
Where to see for the problem? Is using XIfEvent appropriate for this task? How can the event dissapear from the message queue?
Is it possible XShmPutImage to not send event (if send_event = TRUE) or to send event different than ShmCompletion on some circumstances? (for example on some internal error or something?)
EDIT:
After some more research, I found that such hangs happens only when the window manager intensively generate events to the window. For example when I resize the window by dragging its corners.
EDIT2:
I tried several ways to solve this problem, but without success. At the end I was forced to use some timeout and to cancel waiting after some time. But of course this is dirty hack and I want to fix it anyway.
So, what can be the reason XShmPutImage to not send event if send_event=TRUE or is it possible this event to disappear from the message queue?
EDIT3:
Here is the questionable code (FASM):
cinvoke XShmPutImage, ......, TRUE
.loop:
lea eax, [.event]
cinvoke XCheckTypedEvent, [Display], [ShmCompletionEvent], eax
test eax, eax
jz .loop ; there is no message
NB: XShmPutImage always return TRUE, regardless whether the event check hangs or not, so I didn't put error check after it.
EDIT4:
Because of request I am posting the whole code of the drawing function. The code uses some macro libraries of FASM, but at least the ideas are clear (I hope)
Notice that this code contains workaround code that limits the event waiting for only 20ms. Without this timeout the waiting loop simply hangs forever. The number of the XShm event is acquired by calling XShmGetEventBase as recommended in the Xshm documentation.
; Draws the image on a OS provided window surface.
proc DrawImageRect, .where, .pImage, .xDst, .yDst, .xSrc, .ySrc, .width, .height
.event XEvent
rb 256
begin
pushad
mov esi, [.pImage]
test esi, esi
jz .exit
mov ebx, [esi+TImage.ximage]
cinvoke XCreateGC, [hApplicationDisplay], [.where], 0, 0
mov edi, eax
cinvoke XShmPutImage, [hApplicationDisplay], [.where], edi, [esi+TImage.ximage], [.xSrc], [.ySrc], [.xDst], [.yDst], [.width], [.height], TRUE
stdcall GetTimestamp
lea esi, [eax+20] ; 20ms timeout
.loop:
lea eax, [.event]
cinvoke XCheckTypedEvent, [hApplicationDisplay], [ShmCompletionEvent], eax
test eax, eax
jnz .finish
stdcall GetTimestamp
cmp eax, esi
jb .loop
.finish:
cinvoke XFreeGC, [hApplicationDisplay], edi
.exit:
popad
return
endp
And here is the code of the main event loop of the application.
The procedure __ProcessOneSystemEvent simply dispatches the events to the GUI objects and ignores all events it does not use. It does not process ShmCompletionEvent at all.
All the windows created in the application have events mask of:
ExposureMask+FocusChangeMask+KeyPressMask+KeyReleaseMask+ButtonPressMask+ButtonReleaseMask+EnterWindowMask+LeaveWindowMask+PointerMotionMask+StructureNotifyMask
proc ProcessSystemEvents
.event XEvent
rb 256
begin
push ebx ecx edx
.event_loop:
; check for quit
get eax, [pApplication], TApplication:MainWindow
test eax, eax
jz .continue
cmp dword [eax], 0
jne .continue
cinvoke XFlush, [hApplicationDisplay]
xor eax, eax
mov [fGlobalTerminate], 1
stc
pop edx ecx ebx
return
.continue:
cinvoke XPending, [hApplicationDisplay]
test eax, eax
jz .noevents
push edi ecx
lea edi, [.event]
mov ecx, sizeof.XEvent/4
xor eax, eax
rep stosd
pop ecx edi
lea ebx, [.event]
cinvoke XNextEvent, [hApplicationDisplay], ebx
stdcall __ProcessOneSystemEvent, ebx
jmp .event_loop
.noevents:
clc
pop edx ecx ebx
return
endp
The full source code is available in the repository but it is a very big project not easy for navigation. The discussed source is in check-in 8453c99b1283def8.
The files: "freshlib/graphics/images.asm" and "freshlib/graphics/Linux/images.asm" are about the image drawing.
The files: "freshlib/gui/Main.asm" and "freshlib/gui/Linux/Main.asm" are about the general events handling in the application.
What is the X server doing?
The X server can and will suppress a ShmCompletionEvent if the parameters passed to XShmPutImage exceed the geometry of the shared memory area attached to the XImage in the call. The server checks X/Y and width/height against the previously stored limits for the given shared area and if the call parameters are out-of-bounds, the server will return BadValue, suppress the drawing operation, and suppress the completion event.
The above is exactly what is happening in your library. Here's how:
The main event dispatcher routine is ProcessSystemEvents. It performs an XEventNext, and based on event type, using a jump table .jump_table dispatches to an event specific handler function.
The event specific function for an Expose event is .expose
The .expose function will, in turn, call DrawImageRect using X/Y and width/height values from the XExposeEvent struct. This is wrong and is the true source of the bug, as we shall see momentarily.
DrawImageRect will pass these values along in a call to XShmPutImage
The handler for XShmPutImage in the X server will examine these parameters and reject if they're out of bounds.
The parameters are being rejected because they come from an exposure event and are related to the geometry of the window and not the geometry of the shared memory attached to the XImage used in the XShmPutImage call.
Specifically, if the window has just be resized (e.g. by the window manager) and has been enlarged, and there has been a prior ConfigureNotify event for resize. Now, with a new Expose event, it will have larger width/height that will exceed the width/height of the shared memory area that the server knows about.
It is the responsibility of the client to field the window resize events [etc.] and teardown/recreate the shared memory area with the enlarged size. This is not being done and is the source of the bug.
NOTE: Just to be completely clear on this, the server can only report on the error and not do anything about it for a few reasons:
The server knows about the window [and its resize].
It knows about the XImage, its shared memory area and size
But they're only associated during the XShmPutImage call [AFAIK]
Even if the server could associate them, it couldn't adjust the shmarea
That's because it has no way to relink the shmarea to the client side
Only the client can do that via XShmDetach/XShmAttach
Below are redacted versions of the relevant source files from the c5c765bc7e commit. They have been cleaned up a bit, so only the most germane parts remain. Some lines have been truncated or wrapped to eliminate horizontal scrolling.
The files have been annotated with NOTE and NOTE/BUG which I did while analyzing them.
gui/Main.asm The top level generic main loop. Nothing to see much to see here.
; FILE: gui/Main.asm
; _____________________________________________________________________________
;| |
;| ..::FreshLib::.. Free, open source. Licensed under "BSD 2-clause" license." |
;|_____________________________________________________________________________|
;
; Description: Main procedure of GUI application library.
;
; Target OS: Any
;
; Dependencies:
;
; Notes: Organize the main message/event loop needed by every GUI engine.
; This file contains only OS independent part and includes OS dependent
; files.
;______________________________________________________________________________
module "Main library"
proc Run
begin
.mainloop:
stdcall ProcessSystemEvents
jc .terminate
mov eax, [pApplication]
test eax, eax
jz .eventok
get ecx, eax, TApplication:OnIdle
jecxz .eventok
stdcall ecx, eax
.eventok:
stdcall WaitForSystemEvent
jmp .mainloop
.terminate:
DebugMsg "Terminate GUI application!"
return
endp
include '%TargetOS%/Main.asm'
endmodule
gui/Linux/Main.asm The event handlers
; FILE: gui/Linux/Main.asm
; _____________________________________________________________________________
;| |
;| ..::FreshLib::.. Free, open source. Licensed under "BSD 2-clause" license." |
;|_____________________________________________________________________________|
;
; Description: Main procedure of GUI application library.
;
; Target OS: Linux
;
; Dependencies:
;
; Notes: Organize the main message/event loop needed by every GUI engine.
;______________________________________________________________________________
body ProcessSystemEvents
; NOTE: this is the storage for the dequeued event -- all dispatch routines
; should use it and process it
.event XEvent
rb 256
begin
push ebx ecx edx
.event_loop:
; check for quit
get eax, [pApplication], TApplication:MainWindow
test eax, eax
jz .continue ; ???????????
cmp dword [eax], 0
jne .continue
cinvoke XFlush, [hApplicationDisplay]
xor eax, eax
mov [fGlobalTerminate], 1
stc
pop edx ecx ebx
return
; NOTE: it is wasteful for the main loop to call WaitForSystemEvent, then call
; us and we do XPending on the first loop -- we already know we have at least
; one event waiting in the queue
.continue:
cinvoke XPending, [hApplicationDisplay]
test eax, eax
jz .noevents
push edi ecx
lea edi, [.event]
mov ecx, sizeof.XEvent/4
xor eax, eax
rep stosd
pop ecx edi
lea ebx, [.event]
cinvoke XNextEvent, [hApplicationDisplay], ebx
stdcall __ProcessOneSystemEvent, ebx
jmp .event_loop
.noevents:
clc
pop edx ecx ebx
return
endp
body WaitForSystemEvent
.event XEvent
begin
push eax ecx edx
lea eax, [.event]
cinvoke XPeekEvent, [hApplicationDisplay], eax
pop edx ecx eax
return
endp
proc __ProcessOneSystemEvent, .linux_event
begin
pushad
mov ebx, [.linux_event]
; mov eax, [ebx+XEvent.type]
; cmp eax, [ShmCompletionEvent]
; je .shm_completion
stdcall _GetWindowStruct, [ebx+XEvent.window]
jc .notprocessed
test eax, eax
jz .notprocessed
mov esi, eax
mov ecx, [ebx+XEvent.type]
cmp ecx, LASTEvent
jae .notprocessed
mov ecx, [.jump_table+4*ecx]
jecxz .notprocessed
jmp ecx
.notprocessed:
popad
stc
return
.finish:
popad
clc
return
;.shm_completion:
; DebugMsg "Put back completion event!"
;
; int3
; cinvoke XPutBackEvent, [hApplicationDisplay], ebx
; jmp .finish
;.........................................................................
; seMove and seResize events.
;-------------------------------------------------------------------------
.moveresize:
; NOTE/BUG!!!!: we must not only process a resize/move request, but we must also
; adjust the size of the shmarea attached to the XImage -- that is _not_ being
; done. (e.g.) if the window is enlarged, the shmarea must be enlarged
cinvoke XCheckTypedWindowEvent, [hApplicationDisplay],
[ebx+XConfigureEvent.window], ConfigureNotify, ebx
test eax, eax
jnz .moveresize
; resize event...
mov eax, [esi+TWindow._width]
mov edx, [esi+TWindow._height]
cmp eax, [ebx+XConfigureEvent.width]
jne .resize
cmp edx, [ebx+XConfigureEvent.height]
je .is_move
.resize:
exec esi, TWindow:EventResize, [ebx+XConfigureEvent.width],
[ebx+XConfigureEvent.height]
; move event...
.is_move:
mov eax, [esi+TWindow._x]
mov edx, [esi+TWindow._y]
cmp eax, [ebx+XConfigureEvent.x]
jne .move
cmp eax, [ebx+XConfigureEvent.y]
je .finish
.move:
exec esi, TWindow:EventMove,
[ebx+XConfigureEvent.x], [ebx+XConfigureEvent.y]
jmp .finish
;.........................................................................
; DestroyNotify handler it invalidates the handle in TWindow structure and
; then destroys TWindow.
.destroy:
test esi, esi
jz .finish
mov [esi+TWindow.handle], 0
destroy esi
jmp .finish
;.........................................................................
; Window paint event
.expose:
get edi, esi, TWindow:ImgScreen
; NOTE:BUG!!!!!
;
; if the window has been resized (e.g. enlarged), these values are wrong!
; they relate to the _window_ but _not_ the shmarea that is attached to the
; XImage
;
; however, DrawImageRect will call XShmPutImage with these values, they
; will exceed the geometry of what the X server knows about the shmarea and
; it will return BadValue and _suppress_ the completion event for XShmPutImage
stdcall DrawImageRect, [esi+TWindow.handle], edi,
[ebx+XExposeEvent.x],[ebx+XExposeEvent.y],
[ebx+XExposeEvent.x], [ebx+XExposeEvent.y],
[ebx+XExposeEvent.width], [ebx+XExposeEvent.height]
jmp .finish
;.........................................................................
; Mouse event handlers
.mousemove:
cinvoke XCheckTypedWindowEvent, [hApplicationDisplay],
[ebx+XConfigureEvent.window], MotionNotify, ebx
test eax, eax
jnz .mousemove
stdcall ServeMenuMouseMove, [ebx+XMotionEvent.window],
[ebx+XMotionEvent.x], [ebx+XMotionEvent.y],
[ebx+XMotionEvent.state]
jc .finish
cinvoke XCheckTypedWindowEvent, [hApplicationDisplay],
[ebx+XMotionEvent.window], MotionNotify, ebx
test eax, eax
jnz .mousemove
mov edi, [__MouseTarget]
test edi, edi
jz .search_target_move
stdcall __GetRelativeXY, edi, [ebx+XMotionEvent.x], [ebx+XMotionEvent.y]
jmp .target_move
.search_target_move:
exec esi, TWindow:ChildByXY, [ebx+XMotionEvent.x],
[ebx+XMotionEvent.y], TRUE
mov edi, eax
.target_move:
cmp edi, [__LastPointedWindow]
je .move_event
cmp [__LastPointedWindow], 0
je .leave_ok
exec [__LastPointedWindow], TWindow:EventMouseLeave
.leave_ok:
mov [__LastPointedWindow], edi
exec edi, TWindow:EventMouseEnter
.move_event:
exec edi, TWindow:EventMouseMove, ecx, edx, [ebx+XMotionEvent.state]
jmp .finish
;.........................................................................
; event jump table
.jump_table dd 0 ; event 0
dd 0 ; event 1
dd .key_press ; KeyPress = 2
dd .key_release ; KeyRelease = 3
dd .mouse_btn_press ; ButtonPress = 4
dd .mouse_btn_release ; ButtonRelease = 5
dd .mousemove ; MotionNotify = 6
dd 0 ; EnterNotify = 7
dd 0 ; LeaveNotify = 8
dd .focusin ; FocusIn = 9
dd .focusout ; FocusOut = 10
dd 0 ; KeymapNotify = 11
dd .expose ; Expose = 12
dd 0 ; GraphicsExpose = 13
dd 0 ; NoExpose = 14
dd 0 ; VisibilityNotify = 15
dd 0 ; CreateNotify = 16
dd .destroy ; DestroyNotify = 17
dd 0 ; UnmapNotify = 18
dd 0 ; MapNotify = 19
dd 0 ; MapRequest = 20
dd 0 ; ReparentNotify = 21
dd .moveresize ; ConfigureNotify = 22
dd 0 ; ConfigureRequest = 23
dd 0 ; GravityNotify = 24
dd 0 ; ResizeRequest = 25
dd 0 ; CirculateNotify = 26
dd 0 ; CirculateRequest = 27
dd 0 ; PropertyNotify = 28
dd 0 ; SelectionClear = 29
dd 0 ; SelectionRequest = 30
dd 0 ; SelectionNotify = 31
dd 0 ; ColormapNotify = 32
dd .clientmessage ; ClientMessage = 33
dd .mapping_notify ; MappingNotify = 34
graphics/Linux/images.asm The image drawing code [including the DrawImageRect function] and the shared memory create/destroy code.
; FILE: graphics/Linux/images.asm
; _____________________________________________________________________________
;| |
;| ..::FreshLib::.. Free, open source. Licensed under "BSD 2-clause" license." |
;|_____________________________________________________________________________|
;
; Description: Memory based images manipulation library.
;
; Target OS: Linux
;
; Dependencies: memory.asm
;
; Notes:
;______________________________________________________________________________
uses libX11, xshm
struct TImage
.width dd ? ; width in pixels.
.height dd ? ; height in pixels.
.pPixels dd ? ; pointer to the pixel memory.
; os dependent data
.ximage dd ?
.shminfo XShmSegmentInfo
ends
body CreateImage
begin
pushad
stdcall GetMem, sizeof.TImage
jc .finish
mov esi, eax
xor eax, eax
inc eax
mov ecx, [.width]
mov edx, [.height]
cmp ecx, 0
cmovle ecx, eax
cmp edx, 0
cmovle edx, eax
mov [esi+TImage.width], ecx
mov [esi+TImage.height], edx
lea eax, [4*ecx]
imul eax, edx
cinvoke shmget, IPC_PRIVATE, eax, IPC_CREAT or 777o
test eax, eax
js .error
mov [esi+TImage.shminfo.ShmID], eax
cinvoke shmat, eax, 0, 0
cmp eax, -1
je .error_free
mov [esi+TImage.shminfo.Addr], eax
mov [esi+TImage.pPixels], eax
mov [esi+TImage.shminfo.fReadOnly], 1
lea ebx, [esi+TImage.shminfo]
cinvoke XShmCreateImage, [hApplicationDisplay], 0, $20, ZPixmap, eax,
ebx, [esi+TImage.width], [esi+TImage.height]
mov [esi+TImage.ximage], eax
cinvoke XShmAttach, [hApplicationDisplay], ebx
clc
mov [esp+4*regEAX], esi
.finish:
popad
return
.error_free:
cinvoke shmctl, [ebx+XShmSegmentInfo.ShmID], IPC_RMID, 0
.error:
stdcall FreeMem, esi
stc
jmp .finish
endp
body DestroyImage
begin
pushad
mov esi, [.ptrImage]
test esi, esi
jz .finish
lea eax, [esi+TImage.shminfo]
cinvoke XShmDetach, [hApplicationDisplay], eax
cinvoke XDestroyImage, [esi+TImage.ximage]
cinvoke shmdt, [esi+TImage.shminfo.Addr]
cinvoke shmctl, [esi+TImage.shminfo.ShmID], IPC_RMID, 0
stdcall FreeMem, esi
.finish:
popad
return
endp
;if used ___CheckCompletionEvent
;___CheckCompletionEvent:
;
;virtual at esp+4
; .display dd ?
; .pEvent dd ?
; .user dd ?
;end virtual
;
;; timeout
; stdcall GetTimestamp
; cmp eax, [.user]
; jbe #f
;
; DebugMsg "Timeout!"
;
; mov eax, 1
; retn
;
;##:
; mov eax, [.pEvent] ;.pEvent
; mov eax, [eax+XEvent.type]
;
; cmp eax, [ShmCompletionEvent]
; sete al
; movzx eax, al
; retn
;end if
body DrawImageRect
.event XEvent
rb 256
begin
pushad
mov esi, [.pImage]
test esi, esi
jz .exit
mov ebx, [esi+TImage.ximage]
; NOTE: is this necessary? it seems wasteful to create and destroy a GC
; repeatedly. Dunno, does this _have_ to be done here, _every_ time?
cinvoke XCreateGC, [hApplicationDisplay], [.where], 0, 0
mov edi, eax
; NOTE/BUG: The return ShmCompletionEvent will be suppressed due to a BadValue
; if the X/Y and width/height parameters given to us by caller exceed the
; geometry/range of the shmarea attached to .ximage
;
; the routine that calls us is .expose and it _is_ giving us bad values. it is
; passing us X/Y width/height related to an exposure event of the .where
; _window_ which we put in the call. The X server will compare these against
; the size of the shmarea of TImage.xmage and complain if we exceed the bounds
cinvoke XShmPutImage, [hApplicationDisplay], [.where], edi,
[esi+TImage.ximage], [.xSrc], [.ySrc], [.xDst], [.yDst],
[.width], [.height], TRUE
; NOTE/BUG: this code should _not_ be looping on XCheckTypedEvent because it
; disrupts the normal event processing. if we want to be "synchronous" on this
; we should loop on the main event dispatcher (ProcessSystemEvents) and let it
; dispatch to a callback we create. we can set a "pending" flag that our [not
; yet existent] dispatch routine can clear
; THIS CODE SOMETIMES CAUSES HANGS!
stdcall GetTimestamp
lea esi, [eax+20]
.loop:
lea eax, [.event]
cinvoke XCheckTypedEvent, [hApplicationDisplay], [ShmCompletionEvent],
eax
test eax, eax
jnz .finish
stdcall GetTimestamp
cmp eax, esi
jb .loop
.finish:
cinvoke XFreeGC, [hApplicationDisplay], edi
.exit:
popad
return
endp
Xext/shm.c The X server code that checks and processes the XShmPutImage call.
// FILE: Xext/shm.c
static int
ProcShmPutImage(ClientPtr client)
{
GCPtr pGC;
DrawablePtr pDraw;
long length;
ShmDescPtr shmdesc;
REQUEST(xShmPutImageReq);
REQUEST_SIZE_MATCH(xShmPutImageReq);
VALIDATE_DRAWABLE_AND_GC(stuff->drawable, pDraw, DixWriteAccess);
VERIFY_SHMPTR(stuff->shmseg, stuff->offset, FALSE, shmdesc, client);
// NOTE: value must be _exactly_ 0/1
if ((stuff->sendEvent != xTrue) && (stuff->sendEvent != xFalse))
return BadValue;
if (stuff->format == XYBitmap) {
if (stuff->depth != 1)
return BadMatch;
length = PixmapBytePad(stuff->totalWidth, 1);
}
else if (stuff->format == XYPixmap) {
if (pDraw->depth != stuff->depth)
return BadMatch;
length = PixmapBytePad(stuff->totalWidth, 1);
length *= stuff->depth;
}
else if (stuff->format == ZPixmap) {
if (pDraw->depth != stuff->depth)
return BadMatch;
length = PixmapBytePad(stuff->totalWidth, stuff->depth);
}
else {
client->errorValue = stuff->format;
return BadValue;
}
// NOTE/BUG: The following block is the "check parameters" code. If the
// given drawing parameters of the request (e.g. X, Y, width, height) [or
// combinations thereof] exceed the geometry/size of the shmarea, the
// BadValue error is being returned here and the code to send a return
// event will _not_ be executed. The bug isn't really here, it's on the
// client side, but it's the client side bug that causes the event to be
// suppressed
/*
* There's a potential integer overflow in this check:
* VERIFY_SHMSIZE(shmdesc, stuff->offset, length * stuff->totalHeight,
* client);
* the version below ought to avoid it
*/
if (stuff->totalHeight != 0 &&
length > (shmdesc->size - stuff->offset) / stuff->totalHeight) {
client->errorValue = stuff->totalWidth;
return BadValue;
}
if (stuff->srcX > stuff->totalWidth) {
client->errorValue = stuff->srcX;
return BadValue;
}
if (stuff->srcY > stuff->totalHeight) {
client->errorValue = stuff->srcY;
return BadValue;
}
if ((stuff->srcX + stuff->srcWidth) > stuff->totalWidth) {
client->errorValue = stuff->srcWidth;
return BadValue;
}
if ((stuff->srcY + stuff->srcHeight) > stuff->totalHeight) {
client->errorValue = stuff->srcHeight;
return BadValue;
}
// NOTE: this is where the drawing takes place
if ((((stuff->format == ZPixmap) && (stuff->srcX == 0)) ||
((stuff->format != ZPixmap) &&
(stuff->srcX < screenInfo.bitmapScanlinePad) &&
((stuff->format == XYBitmap) ||
((stuff->srcY == 0) &&
(stuff->srcHeight == stuff->totalHeight))))) &&
((stuff->srcX + stuff->srcWidth) == stuff->totalWidth))
(*pGC->ops->PutImage) (pDraw, pGC, stuff->depth,
stuff->dstX, stuff->dstY,
stuff->totalWidth, stuff->srcHeight,
stuff->srcX, stuff->format,
shmdesc->addr + stuff->offset +
(stuff->srcY * length));
else
doShmPutImage(pDraw, pGC, stuff->depth, stuff->format,
stuff->totalWidth, stuff->totalHeight,
stuff->srcX, stuff->srcY,
stuff->srcWidth, stuff->srcHeight,
stuff->dstX, stuff->dstY, shmdesc->addr + stuff->offset);
// NOTE: this is where the return event gets sent
if (stuff->sendEvent) {
xShmCompletionEvent ev = {
.type = ShmCompletionCode,
.drawable = stuff->drawable,
.minorEvent = X_ShmPutImage,
.majorEvent = ShmReqCode,
.shmseg = stuff->shmseg,
.offset = stuff->offset
};
WriteEventsToClient(client, 1, (xEvent *) &ev);
}
return Success;
}
Your source code will be the ultimate piece which we can analyse but since I understand Assembly very less, I will give you an answer on macro level. Exact answer is still unknown to me.
I case of too many events only it is creating this issue but not with normal occurrence of events which means you framework is running out of virtual memory or another frame of event is triggered before the previous one releases its memory. In this kind of situations you can do few things
Try to check if there is any memory leak or not.. After one frame of event is over, try to clean up the memory or end that frame object properly before triggering the new one.
You can also develop a mechanism to make the second frame wait for the first frame to get over. In C/C++ we do it using so many synchronization methods like Mutex or select system calls. If you design follows that kind of pattern, then you can do these
If anywhere you have the authority to change the allocated memory given to your window, try to increase it. Because one thing for sure (according to your explanation ) that this is some memory issue.
Replying on Edit 3 It looks like you are calling some method cinvoke. How it is internally handling the even is unknown to me. Why don't you implement it directly in C. I am sure for whatever target that you are working, you will get some Cross-compiler.

Make beep sound in BIOS

When computer starts to boot, It makes a beep sound from the BIOS Speaker.
How do i can do this in Assembly or C++ ?
Clearly I want to make Beep Sound by BIOS Speaker.
Remember i mean BIOS Speakers
Does it have any interrupt for that ? I searched about that but nothing found..
I used some interrupt But the didn't do this. the following code :
int main(){
cout<<"\a";
}
Produced the sound from the Speaker, Not Bios
How can i do this ? with any interrupt ?
Try to add this code, too.
.pause1:
mov cx, 65535
.pause2:
dec cx
jne .pause2
dec bx
jne .pause1
in al, 61h ; Turn off note (get value from
; port 61h).
and al, 11111100b ; Reset bits 1 and 0.
out 61h, al ; Send new value.
So, the result is:
void beep(){
__asm{
MOV al, 182 ; Prepare the speaker for the
out 43h, al ; note.
mov ax, 2280 ; Frequency number (in decimal)
; for C.
out 42h, al ; Output low byte.
mov al, ah ; Output high byte.
out 42h, al
in al, 61h ; Turn on note (get value from
; port 61h).
or al, 00000011b ; Set bits 1 and 0.
out 61h, al ; Send new value.
mov bx, 4 ; Pause for duration of note.
.pause1:
mov cx, 65535
.pause2:
dec cx
jne .pause2
dec bx
jne .pause1
in al, 61h ; Turn off note (get value from
; port 61h).
and al, 11111100b ; Reset bits 1 and 0.
out 61h, al ; Send new value.
};
}
The only way you can implement this in any modern Windows OS is, I guess, writing Kernel Mode driver. The reason is that in or out instructions are not available in user mode, and there is no API for beeper available.
http://en.wikipedia.org/wiki/Protection_ring
https://msdn.microsoft.com/en-us/library/windows/hardware/ff554836(v=vs.85).aspx
However, if you're just willing to dig into low-level programming, consider writing own bootloader or even own BIOS (using virtual machine).
Try to include this procedure in your C++ program.
void beep(){
__asm{
MOV al, 182 ; Prepare the speaker for the
out 43h, al ; note.
mov ax, 2280 ; Frequency number (in decimal)
; for C.
out 42h, al ; Output low byte.
mov al, ah ; Output high byte.
out 42h, al
in al, 61h ; Turn on note (get value from
; port 61h).
or al, 00000011b ; Set bits 1 and 0.
out 61h, al ; Send new value.
mov bx, 4 ; Pause for duration of note.
};
}

Finding Memory Size in Boot without DOS, Windows, Linux

I am writing a simple program in Assembly (NASM). When the boot sector loads it has to display all the memory (RAM) installed in the computer in Megabytes. There would be no Operating System (DOS, Windows, Linux) when the boot sector is loaded, so how would I find out total RAM size. I have 2 GB RAM in my computer. I have searched alot on the internet but could not find a solution.
Is there an interrupt of BIOS that shows memory size of 2 GB? There is an interrupt that was used in old computers to shows memory, but it does not shows all 2 GBs. I checked and there is no solution for this in Ralph Brown List. May be someone knows alot more about BIOS. If BIOS does not provide this facility then can I use C/C++ to find out Total RAM size? And call C/C++ code from assembly? What function of C/C++ would be used to find Total RAM size?
Remember that my assembly code will do a cold boot and there would be no operating system to provide any facility to my code.
EDITED:
I read the website http://wiki.osdev.org/Detecting_Memory_%28x86%29. And decided to check if int 15 works. So I got the code from this website and edited it to test if int 15 EAX = E820 works. But it fails to work and the output is 'F' in .failed1. 'F' is test case which I made to check for "unsupported function". Test Cases are 'F', 'G' and 'H'. Here is the code.
; use the INT 0x15, eax= 0xE820 BIOS function to get a memory map
; inputs: es:di -> destination buffer for 24 byte entries
; outputs: bp = entry count, trashes all registers except esi
do_e820:
xor ebx, ebx ; ebx must be 0 to start
xor bp, bp ; keep an entry count in bp
mov edx, 0x0534D4150 ; Place "SMAP" into edx
mov eax, 0xe820
mov [es:di + 20], dword 1 ; force a valid ACPI 3.X entry
mov ecx, 24 ; ask for 24 bytes
int 0x15
jc short .failed1 ; carry set on first call means "unsupported function"
mov edx, 0x0534D4150 ; Some BIOSes apparently trash this register?
cmp eax, edx ; on success, eax must have been reset to "SMAP"
jne short .failed2
test ebx, ebx ; ebx = 0 implies list is only 1 entry long (worthless)
je short .failed3
jmp short .jmpin
.e820lp:
mov eax, 0xe820 ; eax, ecx get trashed on every int 0x15 call
mov [es:di + 20], dword 1 ; force a valid ACPI 3.X entry
mov ecx, 24 ; ask for 24 bytes again
int 0x15
jc short .e820f ; carry set means "end of list already reached"
mov edx, 0x0534D4150 ; repair potentially trashed register
.jmpin:
jcxz .skipent ; skip any 0 length entries
cmp cl, 20 ; got a 24 byte ACPI 3.X response?
jbe short .notext
test byte [es:di + 20], 1 ; if so: is the "ignore this data" bit clear?
je short .skipent
.notext:
mov ecx, [es:di + 8] ; get lower dword of memory region length
or ecx, [es:di + 12] ; "or" it with upper dword to test for zero
jz .skipent ; if length qword is 0, skip entry
inc bp ; got a good entry: ++count, move to next storage spot
add di, 24
.skipent:
test ebx, ebx ; if ebx resets to 0, list is complete
jne short .e820lp
.e820f:
mov [mmap_ent], bp ; store the entry count
clc ; there is "jc" on end of list to this point, so the carry must be cleared
mov ah, 0x0E ; Teletype command
mov bh, 0x00 ; Page number
mov bl, 0x07 ; Attributes (7 == white foreground, black background)
mov al, mmap_ent ; Character to print
int 0x10
ret
.failed1:
push eax
push ebx
mov ah, 0x0E ; Teletype command
mov bh, 0x00 ; Page number
mov bl, 0x07 ; Attributes (7 == white foreground, black background)
mov al, 70 ; Character 'F' to print
int 0x10
pop ebx
pop eax
stc ; "function unsupported" error exit
ret
.failed2:
push eax
push ebx
mov ah, 0x0E ; Teletype command
mov bh, 0x00 ; Page number
mov bl, 0x07 ; Attributes (7 == white foreground, black background)
mov al, 71 ; Character 'G' to print
int 0x10
pop ebx
pop eax
stc ; "function unsupported" error exit
ret
.failed3:
push eax
push ebx
mov ah, 0x0E ; Teletype command
mov bh, 0x00 ; Page number
mov bl, 0x07 ; Attributes (7 == white foreground, black background)
mov al, 72 ; Character 'H' to print
int 0x10
pop ebx
pop eax
stc ; "function unsupported" error exit
ret
mmap_ent db 0
failmsg db 0
failmem db 'Failed', 0
;times 512-($-$$) db 0
;dw 0xAA55
EDITED:
I used nasm memext.asm -o memext.com -l memext.lst . Used MagicISO to make a bootable image file memext.iso and used Windows disk burner to burn it to a DVD/RW. Loaded Oracle VM and made a new Virtual Machine with 256 Mb RAM, CD/DVD, Hard disk of 2GB. Booted with DVD for a cold boot test, does not print anything.
Also, I open Command Console and just typed memext and it gave 'F' as output.
You will need to read the ACPI tables on a PC (or other machines that support ACPI).
Note that this will not give you the total size as one number, but give you the memory size of each region of memory - on a simple machine, that may just be two or three regions (there are holes of "not real memory" at the 0xA0000-0xFFFFF and wherever the BIOS decides to put the "PCI-hole").
I suspect it won't be entirely trivial to fit the ACPI reader into a single sector, considering some of the boot sector only has around 400 bytes of space available (although if you completely skip the partition table, I suppose you can use almost all of the 512 bytes).
As to "how to call C/C++", you will not be able to fit any meaningful C or C++ program in less than several sectors. You will need to take a look at an OS bootloader, and see how they achieve the setup for the compiler (and in many cases, you will also need special tools to produce code that is at a particular location suitable to be loaded into the memory and directly executed). This page may be of help for that (I haven't read through it all, it may even tell how much memory you have): http://www.codeproject.com/Articles/36907/How-to-develop-your-own-Boot-Loader
EDIT: my mistake, the wiki is correct, just leaving this here because...
Looks like there's a typo in the wiki - the line:
mov edx,0x0534D4150
should look like:
mov edx,0x050414D53
Notice the bytes are in reverse order (since x86 is little endian).

Porting Assembly to C/C++?

I would like to have this x86 ASM code ported to C or C++, as I don't understand ASM =)
Searching Google it seams Relogix and IDA Pro can do it. But I don't suppose they are cheap, given I only have this one source.
Understanding the math or the algorithm how it works would be just as good, as I am going to use OpenGL anyway.
Are there other methods?
;
; a n d r o m e d a
;
; a 256b demo
; by insomniac/neon
;
code SEGMENT
ASSUME CS:code, DS:code
p386
LOCALS
ORG 100h
max = 4096
maxZ = 5000
b EQU byte ptr
Start: lea di,vars
mov ch,60
rep stosw
mov al,13h
int 10h
p al: mov al,cl
mov dx,3c8h
out dx,al
inc dx
cmp al,64
jb b64_1
mov al,63
b64_1: out dx,al
mov al,cl
shr al,1
out dx,al
out dx,al
loop pal
.main: push 8000h
pop ES
mov si,max
.loop: mov ax,Z[si]
cmp ax,maxZ-(maxZ/4)
jg NewStar
cmp ax,2
jg Zok
NewStar:
mov ax,bp
mov X[si],ax
imul ax,8405h
inc ax
mov bp,ax
shr ax,6
sub ax,400
mov Y[si],ax
mov Z[si],maxZ-(maxZ/2)
Zok: mov ax,X[si]
movsx dx,ah
shl ax,8
mov cx,Z[si]
idiv cx
add ax,320/2
cmp ax,320-1
jge NewStar
cmp ax,1
jle NewStar
mov di,ax
mov ax,Y[si]
movsx dx,ah
shl ax,8
idiv cx
add ax,200/2
imul ax,320
add di,ax
mov al,127
stosb
mov ax,X[si]
cmp ax,00
jge .add
neg ax
shr ax,6
add Y[si],ax
jmp .notadd
.add: shr ax,6
sub Y[si],ax
.notadd:
add Z[si],ax
mov ax,Y[si]
sar ax,3
add X[si],ax
.NextStar:
dec si
dec si
jnz .loop
push DS
push ES
pop DS
xor di,di
xor cx,cx
.blur: movzx ax,DS:[di]
movzx dx,DS:[di+1]
add ax,dx
mov dl,DS:[di-320]
add ax,dx
mov dl,DS:[di+321]
add ax,dx
shr ax,2
cmp al,0
je .skip
dec ax
.skip: stosb
loop .blur
push 0a000h
pop ES
mov si,di
mov ch,81h
rep movsw
pop DS
mov dx,3dah
.vrt: in al,dx
test al,8
jz .vrt
in al,60h
dec ax
jnz .main
endprog:
mov al,3
int 10h
ret
LABEL vars
X dw max DUP (?)
Y dw max DUP (?)
Z dw max DUP (?)
code ENDS
END Start
In general case, this cannot be done. In this particular case, this cannot be done because this assembly program interacts with PC BIOS directly, using int 10h to turn on 320x200 256 VGA mode, and it interacts with PC hardware directly, by writing to IO ports 0x3c8 and 0x3c9 (this sets the VGA palette) and by reading from 0x3da (VGA status register) and 0x60 (PC keyboard microcontroller). It also writes directly to VGA video memory.
Under the conditions of executing on a PC-compatible computer with MS-DOS or similar OS (or, within a simulated environment such as dosbox), this can be done: you can either make each CPU register a C variable and each assembly mnemonic a C function that modifies those variables or writes at absolute memory addresses or IO ports (with outb() and inb()), or, and I think this would be a much more interesting task, understand the math this demo does, and implement that in a portable manner.
Either way, I haven't heard of tools that could do this automatically. Existing tools might handle simple logic, but understanding a demo is never an easy task! What automatic program can tell that
imul ax,8405h
inc ax
should be replaced with ax = rand();?
You could throw it on RentaCoder and pay someone a few $ to write a C version by hand, including an annotation how it was done.
There are quite a few methods of doing so, for example this or this. ;-)