Did I convert this correctly?
Original VS C++ version:
_TEB *pTeb = NULL;
_asm
{
mov eax, fs:[0x18];
mov pTeb, eax;
}
My attempt (GCC):
_TEB *pTeb = NULL;
asm ("movl %%fs:0x18, %%eax\n\t"
"movl %%eax, %0"
: "=rm" (pTeb) : : "%eax");
If you need GCC syntax for Windows-related code, a good source to check is ReactOS sources. Here's their implementation of NtCurrentTeb() (with irrelevant parts removed):
unsigned long __readfsdword(const unsigned long Offset)
{
unsigned long value;
__asm__ __volatile__("movl %%fs:%a[Offset], %k[value]" : [value] "=r" (value) : [Offset] "ir" (Offset));
return value;
}
struct _TEB * NtCurrentTeb(VOID)
{
return (PTEB)__readfsdword(0x18);
}
Related
I was trying to compile ODE(Open Dynamics Engine) physics C++ library for Android Native application on Android Studio.
When i tried to build it, it gave me some error telling that some inline ASM code is not correct as they are written for INTEL processor syntax. This are mostly to get CPU clock frequency for physics simulation purpose.
(Editor's note: this x86 GNU C inline asm is inefficient and not even safe or portable. See How to get the CPU cycle count in x86_64 from C++? for correct ways to use i386 / x86-64 rdtsc.)
static inline void getClockCount (unsigned long cc[2])
{
#ifndef X86_64_SYSTEM
asm volatile (
"rdtsc\n"
"movl %%eax,(%%esi)\n"
"movl %%edx,4(%%esi)\n"
: : "S" (cc) : "%eax","%edx","cc","memory");
#else
asm volatile (
"rdtsc\n"
"movl %%eax,(%%rsi)\n"
"movl %%edx,4(%%rsi)\n"
: : "S" (cc) : "%eax","%edx","cc","memory");
#endif
}
static inline void serialize()
{
#ifndef X86_64_SYSTEM
asm volatile (
"mov $0,%%eax\n"
"push %%ebx\n"
"cpuid\n"
"pop %%ebx\n"
: : : "%eax","%ecx","%edx","cc","memory");
#else
asm volatile (
"mov $0,%%rax\n"
"push %%rbx\n"
"cpuid\n"
"pop %%rbx\n"
: : : "%rax","%rcx","%rdx","cc","memory");
#endif
}
static inline double loadClockCount (unsigned long a[2])
{
double ret;
#ifndef X86_64_SYSTEM
asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
"cc","memory");
#else
asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
"cc","memory");
#endif
return ret;
}
I don't know how to do same for ARM? Any help?
I got a simple code and it gives me a compiler error for no reason
inline assembler syntax error in 'opcode'; found 'constant'
DWORD connectFunctionAddressReturn = 0x775368F7;
int __cdecl ws2_32_connect_hook_output(SOCKET s, const struct sockaddr *name, int namelen) {
struct sockaddr_in *in = (struct sockaddr_in *)connect_name;
printf("Attempting connect %d.%d.%d.%d : %d\n", in->sin_addr.S_un.S_un_b.s_b1, in->sin_addr.S_un.S_un_b.s_b2, in->sin_addr.S_un.S_un_b.s_b3, in->sin_addr.S_un.S_un_b.s_b4, htons(in->sin_port));
}
void __declspec(naked) ws2_32_connect_hook(void) { //ws2_32.connect = 775368F5
__asm {
PUSHAD //To be in safe environment
PUSHFD //To be safe environment
PUSH 0x10
PUSH DWORD PTR SS:[EBP+0x8]
PUSH DWORD PTR DS:[ESI+0x14]
CALL DWORD PTR ws2_32_connect_hook_output //<-- ERROR HERE
ADD ESP, 0xC //clean __cdecl,4,8,C
POPFD //Finish being in safe environment
POPAD //Finish being in safe environment
JMP connectFunctionAddressReturn // <-- ERROR HERE
}
}
solved it, I had CALL AND JMP defined so it errored in ASM code.
#define CALL 0xE8
#define JMP 0xE9
I'm working on a simple real-mode OS in c++. I can't figure out how to print strings, though. The following code works when it's in the bootloader, but not when it's in the kernel.
__asm__ __volatile__(".code16gcc \n");
__asm__ __volatile__ ("xor ax, ax\n");
__asm__ __volatile__ ("mov ds, ax\n");
__asm__ __volatile__("jmp main \n");
void printf(const char* str)
{
while(*str)
{
__asm__ __volatile__("int 0x10" : : "a"(0x0e00 | *str), "b"(0x0007));
++str;
}
}
void main(){
printf("Hi!");
}
I'm sure it's because the ds register it set to 0, but the code is actually at 0x7E00 (that's where the bootloader puts it). I've tried setting ds to 0x7E0, which should cause it to load data correctly because 0x7E0 * 16 = 0x7E00, but it still doesn't work. It's probably just some silly mistake, but I would appreciate some help. If it matters, here is my bootloader code:
__asm__(".code16gcc \n");
void main(){
__asm__ __volatile__("mov al, 0x02 \n");
__asm__ __volatile__("xor ah, ah \n");
__asm__ __volatile__("int 0x10 \n");
__asm__ __volatile__("xor ax, ax \n");
__asm__ __volatile__("mov es, ax \n");
__asm__ __volatile__("mov bx, 0x7E00 \n");
__asm__ __volatile__("mov al, 0x03 \n");
__asm__ __volatile__("mov ch, 0x00 \n");
__asm__ __volatile__("mov cl, 0x02 \n");
__asm__ __volatile__("mov dh, 0x00 \n");
__asm__ __volatile__("mov ah, 0x02 \n");
__asm__ __volatile__("int 0x13 \n");
__asm__ __volatile__("jmp 0:0x7E00 \n");
}
I ran into some weird behavior while working on a SIMD color lerp function, and I trimmed it down into a minimal program. The SIMD code in this example no longer performs a lerp but it performs unpacking from a 32-bit color to an XMM register and then back to 32-bit.
In MSVC++ 2015 (Update 3), in Release x64 mode, the following code does not produce the correct result, but in Debug x64 or Release/Debug x86 it works correctly. This is the only code in an otherwise empty Win32 C++ console application project:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "emmintrin.h"
struct Color4
{
uint8_t red;
uint8_t green;
uint8_t blue;
uint8_t alpha;
Color4(uint8_t red, uint8_t green, uint8_t blue, uint8_t alpha = 255)
: red(red), green(green), blue(blue), alpha(alpha) {}
explicit Color4(uint32_t rgba)
{
red = (uint8_t)(rgba & 0xFF);
green = (uint8_t)((rgba >> 8)&0xFF);
blue = (uint8_t)((rgba >> 16) & 0xFF);
alpha = (uint8_t)((rgba >> 24) & 0xFF);
}
};
Color4 PackUnpack(Color4 col)
{
uint32_t tmp;
memcpy(&tmp, &col, sizeof(tmp));
__m128 aFloat = _mm_cvtepi32_ps(
_mm_unpacklo_epi16(
_mm_unpacklo_epi8(
_mm_set1_epi32(tmp),
_mm_setzero_si128()
),
_mm_setzero_si128()
)
);
__m128i ret = _mm_packus_epi16(
_mm_packs_epi32(
_mm_cvtps_epi32(aFloat),
_mm_setzero_si128()
),
_mm_setzero_si128()
);
return Color4((uint32_t)_mm_cvtsi128_si32(ret));
}
int main()
{
#ifdef _DEBUG
printf("DEBUG\n");
#else
printf("RELEASE\n");
#endif
Color4 c = PackUnpack(Color4(32, 64, 128, 255));
// Debug x64 or Debug/Release x86: Prints "32 64 128 255"
// Release x64: Prints "255 0 0 0"
printf("%d %d %d %d\n", c.red, c.green, c.blue, c.alpha);
return 0;
}
The Release x64 output is:
RELEASE
255 0 0 0
Debug x64 and all x86 output is:
DEBUG
32 64 128 255
The disassembly looks like it's messed up pre-computing a constant value to load into an XMM register to skip the _mm_set1_epi32 (see first movdqa instruction.)
main:
00007FF674391070 sub rsp,38h
00007FF674391074 lea rcx,[string "RELEASE\n" (07FF674392200h)]
00007FF67439107B call printf (07FF674391010h)
00007FF674391080 movdqa xmm0,xmmword ptr [__xmm#000000ff000000ff000000ff000000ff (07FF674392220h)]
00007FF674391088 lea rcx,[string "%d %d %d %d\n" (07FF674392210h)]
00007FF67439108F xorps xmm2,xmm2
00007FF674391092 mov dword ptr [rsp+40h],0FF804020h
00007FF67439109A punpcklbw xmm0,xmm2
00007FF67439109E punpcklwd xmm0,xmm2
00007FF6743910A2 cvtdq2ps xmm0,xmm0
00007FF6743910A5 cvtps2dq xmm1,xmm0
00007FF6743910A9 packssdw xmm1,xmm2
00007FF6743910AD packuswb xmm1,xmm2
00007FF6743910B1 movd r10d,xmm1
00007FF6743910B6 mov edx,r10d
00007FF6743910B9 mov r8d,r10d
00007FF6743910BC shr edx,10h
00007FF6743910BF mov eax,r10d
00007FF6743910C2 shr r8d,8
00007FF6743910C6 movzx r9d,dl
00007FF6743910CA shr eax,18h
00007FF6743910CD movzx edx,r10b
00007FF6743910D1 movzx r8d,r8b
00007FF6743910D5 mov dword ptr [rsp+20h],eax
00007FF6743910D9 call printf (07FF674391010h)
00007FF6743910DE xor eax,eax
00007FF6743910E0 add rsp,38h
00007FF6743910E4 ret
I have tried this with g++ 4.8.4 on Ubuntu 14.04 x64 and it works fine with -O3 on or off.
So my question is, is this a compiler bug, the result of using undefined/implementation defined behavior, or a more mundane bug in my code?
(The code used to use type punning via unions to get the uint32_t value out of the Color4, which I replaced with a memcpy because that's not standard... still no dice.)
Not actually an answer, but, since I don't like to put too much text into the comment, this the smallest code I could reproduce the issue with:
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "emmintrin.h"
int main()
{
uint8_t src[4] = { 32, 64, 128, 255 };
uint32_t tmp = 0;
memcpy( &tmp, &src, sizeof( tmp ) );
auto a = _mm_set1_epi32( tmp );
printf( "tmp = 0x%08x\n", tmp );
printf( "a.m128i_i32[0] = 0x%08x\n", a.m128i_i32[0] );
return 0;
}
Expected output:
tmp = 0xff804020
a.m128i_i32[0] = 0xff804020
Output with Release x64:
tmp = 0xff804020
a.m128i_i32[0] = 0x000000ff
This is due to a compiler bug. A workaround is to use
tmp = color.red + 256 * (col.blue + 256 * (col.green + 256 * col.alpha)));
in place of the memcpy or type punning.
I am compiling a piece of asm code for android:
static void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
__asm__ __volatile__ (
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
"=c" (*c) ,
"=d" (*d)
: "0" (function)) ;
}
The APP_ABI is set to 'all':
APP_ABI := all
The compiling failed when come to x86:
$ ndk-build
[armeabi-v7a] Gdbserver : [arm-linux-androideabi-4.6] libs/armeabi-v7a/gdbserver
[armeabi-v7a] Gdbsetup : libs/armeabi-v7a/gdb.setup
[armeabi] Gdbserver : [arm-linux-androideabi-4.6] libs/armeabi/gdbserver
[armeabi] Gdbsetup : libs/armeabi/gdb.setup
[x86] Gdbserver : [x86-4.6] libs/x86/gdbserver
[x86] Gdbsetup : libs/x86/gdb.setup
[mips] Gdbserver : [mipsel-linux-android-4.6] libs/mips/gdbserver
[mips] Gdbsetup : libs/mips/gdb.setup
[armeabi-v7a] Compile thumb : hello-jni <= CpuArch.c
[armeabi-v7a] SharedLibrary : libhello-jni.so
[armeabi-v7a] Install : libhello-jni.so => libs/armeabi-v7a/libhello-jni.so
[armeabi] Compile thumb : hello-jni <= CpuArch.c
[armeabi] SharedLibrary : libhello-jni.so
[armeabi] Install : libhello-jni.so => libs/armeabi/libhello-jni.so
[x86] Compile : hello-jni <= CpuArch.c
D:/adt/ndk/samples/hello-jni/jni/CpuArch.c: In function 'MyCPUID':
D:/adt/ndk/samples/hello-jni/jni/CpuArch.c:75:3: error: inconsistent operand constraints in an 'asm'
/cygdrive/d/adt/ndk/build/core/build-binary.mk:391: recipe for target '/cygdrive/d/adt/ndk/samples/hello-jni/obj/local/x86/objs-debug/hello-jni/CpuArch.o' failed
make: *** [/cygdrive/d/adt/ndk/samples/hello-jni/obj/local/x86/objs-debug/hello-jni/CpuArch.o] Error 1
I don't have much experience in asm. And the error msg seems not enough to find a solution. :(
BTW,the compiling is made in win7 using cygwin.
Full version:
static void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
#ifdef _MSC_VER
UInt32 a2, b2, c2, d2;
__asm xor EBX, EBX;
__asm xor ECX, ECX;
__asm xor EDX, EDX;
__asm mov EAX, function;
__asm cpuid;
__asm mov a2, EAX;
__asm mov b2, EBX;
__asm mov c2, ECX;
__asm mov d2, EDX;
*a = a2;
*b = b2;
*c = c2;
*d = d2;
#else
__asm__ __volatile__ (
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
"=c" (*c) ,
"=d" (*d)
: "0" (function)) ;
#endif
#else
int CPUInfo[4];
__cpuid(CPUInfo, function);
*a = CPUInfo[0];
*b = CPUInfo[1];
*c = CPUInfo[2];
*d = CPUInfo[3];
#endif
}
This code is based up something I wrote in this Stackoverflow answer. One has to be careful to preserve %ebx register on some x86 based architectures/ABI. %ebx is used to relocate code (shared object etc) when position independent code (-fPIC gcc option) is being generated. The code below avoids using =b in the extended assembler output and uses a register the compiler knows is free and usable. %ebx is preserved by swapping it to the free register before and after the call to cpuid. I've also fixed a small gotchya bug related to the %ecx register. I clear it to 0 ("c"(0)) since on some architectures failure to do so will result in stale values being returned by cpuid.
static void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#if defined(__i386__)
__asm__ __volatile__ (
"xchgl\t%%ebx, %k1\n\t" \
"cpuid\n\t" \
"xchgl\t%%ebx, %k1\n\t"
: "=a"(*a), "=&r"(*b), "=c"(*c), "=d"(*d)
: "a"(function), "c"(0));
#elif defined(__x86_64__)
__asm__ __volatile__ (
"xchgq\t%%rbx, %q1\n\t" \
"cpuid\n\t" \
"xchgq\t%%rbx, %q1\n\t"
: "=a"(*a), "=&r"(*b), "=c"(*c), "=d"(*d)
: "a"(function), "c"(0));
#else
#error "Unknown architecture."
#endif
}