Segmentation fault in NASM 64bit - c++

I am trying to output the result to the user after getting 3 inputs from scanf.
When I run my code, I am able to get the input I need. However it crashes after I collect the input and begin the calculation.
By the way, I am using Ubuntu 14.04 with g++ and NASM 64bit.
Here's how it should look:
This program is brought to you by Chris Tarazi
Welcome to Areas of Trapezoids
Please enter one of the base numbers: 5.8
Please enter the other base number: 2.2
Please enter the height: 6.5
****//Crashes here with Segmentation fault (core dumped)****
The area of a trapezoid with sizes 5.799999999999999365, 2.200000000000000153,
and 6.500000000000000000 is 26.000000000000000328
Have a nice day. Enjoy your trapezoids.
C++ file:
#include <stdio.h>
#include <stdint.h>
extern "C" double ComputeArea(); // links with global in assembly
using namespace std;
int main()
{
double area;
printf("This program is brought to you by Chris Tarazi.\n");
area = ComputeArea();
printf("Have a nice day. Enjoy your trapezoids.\n");
return 0;
}
Assembly file:
extern printf ; This function will be linked later.
extern scanf
global ComputeArea ; Declare function global to link with "extern" from C++.
;---------------------------------Declare variables-------------------------------------------
segment .data
welcome: db "Welcome to the area of trapezoids.", 10, 0
input: db "Please enter one of the base numbers: ", 0
secInput: db "Please enter the other base number: ", 0
output: db "The area of a trapezoid with sizes %1.18lf, %1.18lf, and %1.18lf is %1.18lf .", 10, 0
hInput: db "Please enter the height: ", 0
inputformat: db "%lf", 0
stringformat: db "%s", 0
fourfloatformat: db "%1.18lf %1.18lf %1.18lf %1.18lf", 0
;---------------------------------Begin segment of executable code------------------------------
segment .text
ComputeArea: ; Area of trapezoid = ((a + b) / 2) * h.
push rbp ; Save a copy of the stack base pointer
mov rbp, rsp ; We do this in order to be 100% compatible with C and C++.
push rbx ; Back up rbx
push rcx ; Back up rcx
push rdx ; Back up rdx
push rsi ; Back up rsi
push rdi ; Back up rdi
push r8 ; Back up r8
push r9 ; Back up r9
push r10 ; Back up r10
push r11 ; Back up r11
push r12 ; Back up r12
push r13 ; Back up r13
push r14 ; Back up r14
push r15 ; Back up r15
pushf ; Back up rflags
;---------------------------------Output messages to user---------------------------------------
mov qword rax, 0
mov rdi, stringformat
mov rsi, welcome
call printf
mov qword rax, 0
mov rdi, stringformat
mov rsi, input
call printf
push qword 0
mov qword rax, 0
mov rdi, inputformat
mov rsi, rsp ;firstbase
call scanf
movsd xmm0, [rsp]
pop rax
mov qword rax, 0
mov rdi, stringformat
mov rsi, secInput
call printf
push qword 0
mov qword rax, 0
mov rdi, inputformat
mov rsi, rsp ;secondbase
call scanf
movsd xmm1, [rsp + 4]
pop rax
mov qword rax, 0
mov rdi, stringformat
mov rsi, hInput
call printf
push qword 0
mov qword rax, 0
mov rdi, inputformat
mov rsi, rsp ;height
call scanf
movsd xmm2, [rsp + 8]
pop rax
;---------------------------------Begin ComputeArea Calculation-----------------------------------
mov rax, 2
cvtsi2sd xmm3, rax
addsd xmm0, xmm1
divsd xmm0, xmm3
mulsd xmm0, xmm2
ret
;---------------------------------Output result to user-------------------------------------------
mov rax, 3
mov rdi, output
call printf

First off, why on earth are you saving ALL of those registers?!? The ABI for 64 bit Linux says you only need to save rbx, rbp, and r12 - r15 if you use those registers in your function. Also, you using Assembler, there is no need to create a stack frame in 64bit land (plus you aren't even using rbp! so why create a stack frame?) The only thing that is very important is to make sure your stack is aligned on a 16 byte boundary - call pushes an 8 byte return address, so all you need in your ComputeArea function is sub rsp, 8 and add rsp, 8 right before your ret.
In your first scanf you are using rsp without adjusting it, you just overwrote something!
You do some computations here:
mov rax, 2
cvtsi2sd xmm3, rax
addsd xmm0, xmm1
divsd xmm0, xmm3
mulsd xmm0, xmm2
ret
You return from the procedure here but do not pop all of those registers you just pushed!! So basically your stack pointer is all messed up! The CPU does not know what the return address is!
What you do in the prologue, must be reversed in the epilogue before you return!
Maybe, you should start simple, read in 3 floats and try to print them!
When I correct your code, this is my output:
Welcome to the area of trapezoids.
Please enter one of the base numbers: 5.8
Please enter the other base number: 2.2
Please enter the height: 6.5
The area of a trapezoid with sizes 5.799999999999999822, 2.200000000000000178, and 6.500000000000000000 is 26.000000000000000000 .

Related

How to add my custom c++ logic to output some register info in the middle of a fuction?

I had use ida and x64dbg find some logic I care about. The development enviroment is windows x64 , visual studio 2017 .The register value I care out is in the middle of function(000000006137C034).I can not use minihook to hook sub_xxx, then in the detoure fuction I can output some info I have analized.
Disassemble code in a dll I want to analyze
.text:000000006137BFE0 ; __int64 __fastcall sub_6137BFE0(__int64, __int64, __int64, int)
.text:000000006137BFE0 sub_6137BFE0 proc near ; DATA XREF: .rdata:000000006286BB78↓o
.text:000000006137BFE0 ; .pdata:00000000670C06BC↓o
.text:000000006137BFE0
.text:000000006137BFE0 var_28 = dword ptr -28h
.text:000000006137BFE0 var_20 = dword ptr -20h
.text:000000006137BFE0 var_18 = dword ptr -18h
.text:000000006137BFE0 arg_0 = qword ptr 8
.text:000000006137BFE0 arg_8 = qword ptr 10h
.text:000000006137BFE0 arg_10 = qword ptr 18h
.text:000000006137BFE0
.text:000000006137BFE0 mov [rsp+arg_0], rbx
.text:000000006137BFE5 mov [rsp+arg_8], rbp
.text:000000006137BFEA mov [rsp+arg_10], rsi
.text:000000006137BFEF push rdi
.text:000000006137BFF0 sub rsp, 40h
.text:000000006137BFF4 mov rbp, rdx
.text:000000006137BFF7 mov [rsp+48h+var_18], 1
.text:000000006137BFFF mov rdx, [rcx+8]
.text:000000006137C003 mov edi, r9d
.text:000000006137C006 mov rsi, r8
.text:000000006137C009 mov [rsp+48h+var_20], 8
.text:000000006137C011 mov ecx, 0A0h
.text:000000006137C016 mov [rsp+48h+var_28], 4E2h
.text:000000006137C01E mov r9d, 0FF12FD5Ah
.text:000000006137C024 xor r8d, r8d
.text:000000006137C027 call global_new_handler
.text:000000006137C02C mov rbx, rax
.text:000000006137C02F test rax, rax
.text:000000006137C032 jz short loc_6137C05F
.text:000000006137C034 mov rcx, rax
{
#here I want to insert logic to output some memory info pointed by register.
#After doing this , the process continue to excute.
# add code : jump to addr of my own code
}
.text:000000006137C037 call sub_61367440
.text:000000006137C03C mov qword ptr [rbx+98h], 0
.text:000000006137C047 lea rax, off_6286B8A8
.text:000000006137C04E mov [rbx], rax
.text:000000006137C051 mov rax, rbx
.text:000000006137C054 mov [rbx+38h], rbp
.text:000000006137C058 mov [rbx+40h], rsi
.text:000000006137C05C mov [rbx+48h], edi
.text:000000006137C05F
.text:000000006137C05F loc_6137C05F: ; CODE XREF: sub_6137BFE0+52↑j
.text:000000006137C05F mov rbx, [rsp+48h+arg_0]
.text:000000006137C064 mov rbp, [rsp+48h+arg_8]
.text:000000006137C069 mov rsi, [rsp+48h+arg_10]
.text:000000006137C06E add rsp, 40h
.text:000000006137C072 pop rdi
.text:000000006137C073 retn
.text:000000006137C073 sub_6137BFE0 endp
.text:000000006137C073
I found thess papers The-Beginners-Guide-to-Codecaves, Easy-Mid-Hook
, code cave is similar to what I want . I can add my custom logic ,but it's 32 bit project. I should modify some code in 000000006137C034, jump to My own code , after some work , the process continue to excute.
My Own Code Write By C++ On another project which load the dll.
# save all registers, which maybe written by asm
# monitor logic , which written by c++
# restore all registers
# return to 000000006137C037 , so the process continue to excute
There may be some technical point:
a. Modify code segment to add jump instruction during process running ,so the process can jump to my own code;
b. How to realize my own code : stack and register save and restore, and how to deal with the return addr.
c. Other things needs paying attention to for my poor knowledge of x64 instruction and reversing engineering.
I hope some one can give me some hint( paper , open source library, or web page) so I can keep forward to learning, and finally realize these functionity I needed.
Thanks!
I have found some page help me to understand, and I will keep updating what I found during realizing my expected functionity.
1.Hooking-By-Example
2.Adding x86, x64 Assembly Language code to the Visual Studio C++ project.
3.What is the 'shadow space' in x64 assembly?

I have an x86-64 program that only works properly when run from the gdb debugger

I have written a primitive version of malloc in x86 assembler as an exercise. The code uses a linked list to keep track of allocated memory blocks. I decided to add a function to walk the list and print out the meta data for each block and encountered this weird problem. When I run the code using gdb it works properly but when run directly without gdb it does not. When I print out an address returned by sbrk as a hex string it only prints correctly if run from gdb. If run repeatedly without gdb it prints a different number each run. I have cut the code down to the minimum needed to illustrate the problem. I have tried everything I can think of to find the problem. I'm sure that my itoh and printstring funcions are working correctly. I have tried linking with the c library and using puts but it does the same. I tried initializing all registers to zero. I have looked for any registers altered by the call to sbrk and saved and restored them across the call. Nothing has worked. Here is the code that illustrates the problem:
global _start,itoh,printstring
section .rodata
TRUE equ 1
FALSE equ 0
NULL equ 0
LF equ 10
sys_brk equ 12
exit_ok equ 0
sys_exit equ 60
sys_write equ 1
stdout equ 1
section .data
current_brk dq 1
linefeed db LF, NULL
msg1 db 'Test should print 0x403000 from constant: ', NULL
msg2 db 'Test should print 0x403000 from sys_brk return: ', NULL
number db '--------------------', NULL
section .text
_start: mov rdi, msg1
call printstring
mov rdi, 0x403000
mov rsi, number
mov rdx, TRUE
call itoh
mov rdi, number
call printstring
mov rax, sys_brk
syscall
mov [current_brk], rax
mov rdi, msg2
call printstring
mov rdi, [current_brk]
mov rsi, number
mov rdx, TRUE
call itoh
mov rdi, number
call printstring
.exit: mov rax, sys_exit
mov rdi, exit_ok
syscall
;
; itoh - rdi intger to convert
; - rsi address of string to return result
; - rdx if true add a newline to string
; return nothing
itoh: push rcx
push rax
xor r10, r10 ; r10 counts the digits pushed onto stack
mov r9, rdx ; save newline flag in r9
mov rax, rdi ; rax is bottom half of dividend
mov rcx, 16 ; rcx is divisor
.div: xor rdx, rdx ; zero rdx, top half of 128 bit dividend
div rcx ; divide rdx:rax by rcx
push rdx ; rdx is remainder
inc r10 ; increment digit counter
cmp rax, 0 ; is quotient zero?
jne .div ; no - keep dividimg by 16 and pushing remainder
.pop: mov byte[rsi], "0"
inc rsi
mov byte[rsi], "x"
inc rsi
.p0: pop r11 ; get a digit from stack
cmp r11, 10
jl .p1
sub r11, 10
add r11, "a"
jmp .p2
.p1: add r11, "0" ; convert to ascii char
.p2: mov byte[rsi],r11b ; copy ascii digit to string buffer
dec r10 ; decrement digit count
inc rsi ; point rsi to next char position
cmp r10, 0 ; is digit counter 0
jne .p0 ; no, go get another digit from stack
cmp r9, 0
je .exit
mov byte[rsi], LF
inc rsi
.exit: mov byte[rsi], NULL ; terminate string
pop rax
pop rcx
ret
;
; printstring - rdi is address of string
; return nothing
printstring:
push rcx ; sys_write modifies rcx
push rax ; sys_write modifies rax
xor rdx, rdx ; zero rdx, char count
mov rsi, rdi ; use rsi to index into string
.countloop:
cmp byte [rsi],NULL ; end of string?
je .countdone ; yes, finished counting
inc rdx ; no, count++
inc rsi ; point to next char
jmp .countloop
.countdone:
cmp rdx, 0 ; were there any characters?
je .printdone ; no - exit
mov rax, sys_write ; write system call
mov rsi, rdi ; address of string
mov rdi, stdout ; write to stdout
syscall ; number of bytes to write is in rdx
.printdone:
pop rax
pop rcx
ret
yasm -felf64 -gdwarf2 test.asm
ld -g -otest test.o
gdb test
Type "apropos word" to search for commands related to "word"...
Reading symbols from test...
[?2004h(gdb) run
[?2004l
Starting program: /home/david/asm/test
Test should print 0x403000 from constant: 0x403000
Test should print 0x403000 from sys_brk return: 0x403000
[Inferior 1 (process 28325) exited normally]
[?2004h[?2004l
[?2004h(gdb) q
[?2004l
./test
Test should print 0x403000 from constant: 0x403000
Test should print 0x403000 from sys_brk return: 0x14cf000

Get device encryption support information

I want to detect the device encryption support in my program. This info is available in the System Information program. Please check out the screenshot below:
What kind of Win API functions are used/available to detect the device encryption support? What System Information program uses to detect it? I just need some information.
TL;DR: it uses undocumented functions from fveapi.dll (Windows Bitlocker Drive Encryption API). It seems to rely only on the TPM capabilities.
Note that I only spent like 15 mins on it, but I doubt I missed something crucial, althoug this might be possible.
A bit of Reverse engineering
Typed system information in search bar, saw it spawned msinfo32.exe. Put the binary in a disassembler. It uses a MUI file so I'll have to search for the strings in the MUI file and not the executable.
Searching Device Encryption Support leads to string ID 951 (0x3b7)
STRINGTABLE
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
{
951, "Device Encryption Support|%s"
Searching for the contant in the disassembler leads to a function named:
DeviceEncryptionDataPoints(struct CWMIHelper *, struct CPtrList *)
The load ing of the aforementioned string is almost right at the start:
.text:00000001400141E9 mov edx, 3B7h
.text:00000001400141EE lea rcx, [rsp+2C8h+var_280]
.text:00000001400141F3
.text:00000001400141F3 loc_1400141F3:
.text:00000001400141F3 ; try {
.text:00000001400141F3 call cs:__imp_?LoadStringW#CString##QEAAHI#Z ; CString::LoadStringW(uint)
So we are definitely in the right function.
It loads the fveapi.dll module:
.text:0000000140014269 xor edx, edx ; hFile
.text:000000014001426B mov r8d, 800h ; dwFlags
.text:0000000140014271 lea rcx, LibFileName ; "fveapi.dll"
.text:0000000140014278 call cs:__imp_LoadLibraryExW
Gets a pointer on FveQueryDeviceEncryptionSupport:
.text:00000001400142AB lea rdx, aFvequerydevice ; "FveQueryDeviceEncryptionSupport"
.text:00000001400142B2 mov rcx, rdi ; hModule
.text:00000001400142B5 call cs:__imp_GetProcAddress
And immediately calls the function (this is a protected CFG call, but it's here):
.text:00000001400142CA mov [rsp+2C8h+var_254], rbx
.text:00000001400142CF mov [rsp+2C8h+var_260], 14h
.text:00000001400142D7 mov [rsp+2C8h+var_25C], 1
.text:00000001400142DF mov [rsp+2C8h+var_258], 1
.text:00000001400142E7 lea rcx, [rsp+2C8h+var_260]
.text:00000001400142EC call cs:__guard_dispatch_icall_fptr
Return value
If the function fails:
.text:00000001400142EC call cs:__guard_dispatch_icall_fptr
.text:00000001400142F2 mov esi, eax
.text:00000001400142F4 test eax, eax
.text:00000001400142F6 js loc_1400143F0 ; check failure
We land here:
.text:00000001400143F7 mov edx, 2FFh
.text:00000001400143FC lea rcx, [rsp+2C8h+var_288]
.text:0000000140014401 call cs:__imp_?LoadStringW#CString##QEAAHI#Z ; CString::LoadStringW(uint)
The string 0x2FF (767) is:
767, "Elevation Required to View"
If the call succeed, the code checks one of the parameter which is definitly an out parameter:
.text:00000001400142EC call cs:__guard_dispatch_icall_fptr
.text:00000001400142F2 mov esi, eax
.text:00000001400142F4 test eax, eax
.text:00000001400142F6 js loc_1400143F0
.text:00000001400142FC cmp dword ptr [rsp+2C8h+var_254], ebx ; rbx = 0
.text:0000000140014300 jnz short loc_14001431D
.text:0000000140014302 mov edx, 3B8h
.text:0000000140014307 lea rcx, [rsp+2C8h+var_288]
.text:000000014001430C call cs:__imp_?LoadStringW#CString##QEAAHI#Z ; CString::LoadStringW(uint)
If it's 0, the string 0x3b8 (952) is used:
952, "Meets prerequisites"
Otherwise various failure functions are called.
Failure
In case of a failure, the UpdateDeviceEncryptionStateFailureString function is called:
.text:0000000140014325 lea r9, [rsp+2C8h+var_294] ; int *
.text:000000014001432A lea r8, [rsp+2C8h+var_290] ; int *
.text:000000014001432F mov edx, 3C1h ; unsigned int
.text:0000000140014334 lea rcx, [rsp+2C8h+var_288] ; struct CString *
.text:0000000140014339 call ?UpdateDeviceEncryptionStateFailureString##YAXPEAVCString##IPEAH1#Z ; UpdateDeviceEncryptionStateFailureString(CString *,uint,int *,int *)
Its main goal is to fetch some string from the resource file.
One that stands out is 0x3b9:
.text:0000000140013A37 mov edx, 3B9h
.text:0000000140013A3C mov rcx, rbx
.text:0000000140013A3F call cs:__imp_?LoadStringW#CString##QEAAHI#Z ; CString::LoadStringW(uint)
953, "Reasons for failed automatic device encryption"
Which is the case for me since I don't have a TPM.
Other Functions
All of the other functions called from the DeviceEncryptionDataPoints (at least to get the needed results) are all from the fveapi.dll.
There are a lot in a function called PerformIndividualHardwareTests(HINSTANCE hModule, struct CString *, int *, int *):
.text:0000000140013AEF lea rdx, aNgscbcheckisao ; "NgscbCheckIsAOACDevice"
.text:0000000140013AF6 mov [rbp+var_1F], 0
.text:0000000140013AFA mov rdi, r9
.text:0000000140013AFD mov [rbp+var_20], 0
.text:0000000140013B01 mov rsi, r8
.text:0000000140013B04 mov [rbp+var_1E], 0
.text:0000000140013B08 mov rbx, rcx
.text:0000000140013B0B call cs:__imp_GetProcAddress
.text:0000000140013B12 nop dword ptr [rax+rax+00h]
.text:0000000140013B17 mov r12, rax
.text:0000000140013B1A test rax, rax
.text:0000000140013B1D jz loc_140013CB9
.text:0000000140013B23 lea rdx, aNgscbcheckishs ; "NgscbCheckIsHSTIVerified"
.text:0000000140013B2A mov rcx, rbx ; hModule
.text:0000000140013B2D call cs:__imp_GetProcAddress
.text:0000000140013B34 nop dword ptr [rax+rax+00h]
.text:0000000140013B39 mov r15, rax
.text:0000000140013B3C test rax, rax
.text:0000000140013B3F jz loc_140013CB9
.text:0000000140013B45 lea rdx, aNgscbcheckhsti ; "NgscbCheckHSTIPrerequisitesVerified"
.text:0000000140013B4C mov rcx, rbx ; hModule
.text:0000000140013B4F call cs:__imp_GetProcAddress
.text:0000000140013B56 nop dword ptr [rax+rax+00h]
.text:0000000140013B5B mov r13, rax
.text:0000000140013B5E test rax, rax
.text:0000000140013B61 jz loc_140013CB9
.text:0000000140013B67 lea rdx, aNgscbcheckdmas ; "NgscbCheckDmaSecurity"
.text:0000000140013B6E mov rcx, rbx ; hModule
.text:0000000140013B71 call cs:__imp_GetProcAddress
There's also a registry key checked SYSTEM\CurrentControlSet\Control\BitLocker\AutoDE\HSTI:
.text:0000000140013C10 lea r8, ?NGSCB_AUTODE_HSTI_REQUIRED##3QBGB ; "HSTIVerificationRequired"
.text:0000000140013C17 mov [rsp+60h+pcbData], rax ; pcbData
.text:0000000140013C1C lea rdx, ?NGSCB_AUTODE_HSTI_PREREQS##3QBGB ; "SYSTEM\\CurrentControlSet\\Control\\Bit"...
.text:0000000140013C23 lea rax, [rbp+var_1C]
.text:0000000140013C27 mov r9d, 10h ; dwFlags
.text:0000000140013C2D mov [rsp+60h+pvData], rax ; pvData
.text:0000000140013C32 mov rcx, 0FFFFFFFF80000002h ; hkey
.text:0000000140013C39 and [rsp+60h+var_40], 0
.text:0000000140013C3F call cs:__imp_RegGetValueW
and some other functions (NgscbCheckPreventDeviceEncryption, NgscbGetWinReConfiguration, FveCheckTpmCapability, ...) , once again, all from the fveapi.dll module.
So basically the checks are all based on functions from this DLL. It seems that none of them are documented (as far as I can see with a quick search).
I didn't find anything around in the DeviceEncryptionDataPoints caller (which is basically the main() function), since the next calls are dealing with checking the hypervisor capabilities.

What is more efficient in this case, using const char* or std::string

I am using a combination of C and C++ code in my application.
I want to print out if a boolean flag is true or false as below, by using a ternary operator to determine the string to print.
If I use a const char*, doesn't the compiler more than likely store these string literals "Yes" and "No" in some read-only memory before the program starts.
If I use std::string, when the string goes out of scope, it will be destroyed? But I guess the complier still needs to store the string literals "Yes" and "No" somewhere anyways? I'm not sure.
bool isSet = false;
// More code
//std::string isSetStr = isSet ? "Yes" : "No";
const char* isSetStr = isSet ? "Yes" : "No";
//printf ( "Flag is set ? : %s\n", isSetStr.c_str());
printf ( "Flag is set ? : %s\n", isSetStr);
Either version will allocate the string literals themselves in read-only memory. Either version uses a local variable that goes out of scope, but the string literals remain since they aren't stored locally.
Regarding performance, C++ container classes are almost always going to be more inefficient than "raw" C. When testing your code with g++ -O3 I get this:
void test_cstr (bool isSet)
{
const char* isSetStr = isSet ? "Yes" : "No";
printf ( "Flag is set ? : %s\n", isSetStr);
}
Disassembly (x86):
.LC0:
.string "Yes"
.LC1:
.string "No"
.LC2:
.string "Flag is set ? : %s\n"
test_cstr(bool):
test dil, dil
mov eax, OFFSET FLAT:.LC1
mov esi, OFFSET FLAT:.LC0
mov edi, OFFSET FLAT:.LC2
cmove rsi, rax
xor eax, eax
jmp printf
The string literals are loaded into read-only locations and the isSetStr variable is simply optimized away.
Now try this using the same compiler and options (-O3):
void test_cppstr (bool isSet)
{
std::string isSetStr = isSet ? "Yes" : "No";
printf ( "Flag is set ? : %s\n", isSetStr.c_str());
}
Disassembly (x86):
.LC0:
.string "Yes"
.LC1:
.string "No"
.LC2:
.string "Flag is set ? : %s\n"
test_cppstr(bool):
push r12
mov eax, OFFSET FLAT:.LC1
push rbp
push rbx
mov ebx, OFFSET FLAT:.LC0
sub rsp, 32
test dil, dil
cmove rbx, rax
lea rbp, [rsp+16]
mov QWORD PTR [rsp], rbp
mov rdi, rbx
call strlen
xor edx, edx
mov esi, eax
test eax, eax
je .L7
.L6:
mov ecx, edx
add edx, 1
movzx edi, BYTE PTR [rbx+rcx]
mov BYTE PTR [rbp+0+rcx], dil
cmp edx, esi
jb .L6
.L7:
mov QWORD PTR [rsp+8], rax
mov edi, OFFSET FLAT:.LC2
mov BYTE PTR [rsp+16+rax], 0
mov rsi, QWORD PTR [rsp]
xor eax, eax
call printf
mov rdi, QWORD PTR [rsp]
cmp rdi, rbp
je .L1
call operator delete(void*)
.L1:
add rsp, 32
pop rbx
pop rbp
pop r12
ret
mov r12, rax
jmp .L4
test_cppstr(bool) [clone .cold]:
.L4:
mov rdi, QWORD PTR [rsp]
cmp rdi, rbp
je .L5
call operator delete(void*)
.L5:
mov rdi, r12
call _Unwind_Resume
The string literals are still allocated in read-only memory so that part is the same. But we got a massive chunk of overhead bloat code.
But on the other hand, the biggest bottleneck by far in this case is the console I/O so the performance of the rest of the code isn't even relevant. Strive to write the most readable code possible and only optimize when you actually need it. Manual string handling in C is fast, but it's also very error-prone and cumbersome.
You can test it with godbolt.
The former (using const char*) gives this:
.LC0:
.string "No"
.LC1:
.string "Yes"
.LC2:
.string "Flag is set ? : %s\n"
a(bool):
test dil, dil
mov eax, OFFSET FLAT:.LC0
mov esi, OFFSET FLAT:.LC1
cmove rsi, rax
mov edi, OFFSET FLAT:.LC2
xor eax, eax
jmp printf
The latter (using std::string) gives this:
.LC0:
.string "Yes"
.LC1:
.string "No"
.LC2:
.string "Flag is set ? : %s\n"
a(bool):
push r12
push rbp
mov r12d, OFFSET FLAT:.LC1
push rbx
mov esi, OFFSET FLAT:.LC0
sub rsp, 32
test dil, dil
lea rax, [rsp+16]
cmovne r12, rsi
or rcx, -1
mov rdi, r12
mov QWORD PTR [rsp], rax
xor eax, eax
repnz scasb
not rcx
lea rbx, [rcx-1]
mov rbp, rcx
cmp rbx, 15
jbe .L3
mov rdi, rcx
call operator new(unsigned long)
mov QWORD PTR [rsp+16], rbx
mov QWORD PTR [rsp], rax
.L3:
cmp rbx, 1
mov rax, QWORD PTR [rsp]
jne .L4
mov dl, BYTE PTR [r12]
mov BYTE PTR [rax], dl
jmp .L5
.L4:
test rbx, rbx
je .L5
mov rdi, rax
mov rsi, r12
mov rcx, rbx
rep movsb
.L5:
mov rax, QWORD PTR [rsp]
mov QWORD PTR [rsp+8], rbx
mov edi, OFFSET FLAT:.LC2
mov BYTE PTR [rax-1+rbp], 0
mov rsi, QWORD PTR [rsp]
xor eax, eax
call printf
mov rdi, QWORD PTR [rsp]
lea rax, [rsp+16]
cmp rdi, rax
je .L6
call operator delete(void*)
jmp .L6
mov rdi, QWORD PTR [rsp]
lea rdx, [rsp+16]
mov rbx, rax
cmp rdi, rdx
je .L8
call operator delete(void*)
.L8:
mov rdi, rbx
call _Unwind_Resume
.L6:
add rsp, 32
xor eax, eax
pop rbx
pop rbp
pop r12
ret
Using std::string_view such as:
#include <stdio.h>
#include <string_view>
int a(bool isSet) {
// More code
std::string_view isSetStr = isSet ? "Yes" : "No";
//const char* isSetStr = isSet ? "Yes" : "No";
printf ( "Flag is set ? : %s\n", isSetStr.data());
//printf ( "Flag is set ? : %s\n", isSetStr);
}
gives:
.LC0:
.string "No"
.LC1:
.string "Yes"
.LC2:
.string "Flag is set ? : %s\n"
a(bool):
test dil, dil
mov eax, OFFSET FLAT:.LC0
mov esi, OFFSET FLAT:.LC1
cmove rsi, rax
mov edi, OFFSET FLAT:.LC2
xor eax, eax
jmp printf
So to sum up, both const char* and string_view gives optimal code. string_view is a bit more code to type compared to const char*.
std::string is made to manipulate string content, so it's overkill here and leads to less efficient code.
Another remark with string_view: It does not guarantee that the string is NUL terminated. In this case, it is, since it's built from a NUL terminated static string. For a generic string_view usage with printf, use printf("%.*s", str.length(), str.data());
EDIT: By disabling exception handling, you can reduce std::string version to:
.LC0:
.string "Yes"
.LC1:
.string "No"
.LC2:
.string "Flag is set ? : %s\n"
a(bool):
push r12
mov eax, OFFSET FLAT:.LC1
push rbp
mov ebp, OFFSET FLAT:.LC0
push rbx
sub rsp, 32
test dil, dil
cmove rbp, rax
lea r12, [rsp+16]
mov QWORD PTR [rsp], r12
mov rdi, rbp
call strlen
mov rsi, rbp
mov rdi, r12
lea rdx, [rbp+0+rax]
mov rbx, rax
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_S_copy_chars(char*, char const*, char const*)
mov rax, QWORD PTR [rsp]
mov QWORD PTR [rsp+8], rbx
mov edi, OFFSET FLAT:.LC2
mov BYTE PTR [rax+rbx], 0
mov rsi, QWORD PTR [rsp]
xor eax, eax
call printf
mov rdi, rsp
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_dispose()
add rsp, 32
pop rbx
pop rbp
pop r12
ret
which is still a lot more than the string_view's version. Remark that the compiler was smart enough to remove the memory allocation on the heap here, but it's still forced to compute the string's length (even if printf will also compute it itself).
Chill out!
The printf will be orders of magnitude slower than any construction of a std::string from const char[] data embedded in the program source code.
Always use a profiler when examining code performance. Writing a small program in an attempt to test a hypothesis will often fail to tell you anything about what is happening in your big program. In the case you present, a good compiler will optimise to
int main(){printf ( "Flag is set ? : No\n");}
String literals have static storage duration, They are alive until program ends.
Pay attention to that if you are using in a program the same string literal it is not necessary that the compiler stores this string literal as one object.
That is this expression
"Yes" == "Yes"
can yield either true or false depending on compiler options. But usually by defaults identical string literals are stored as one string literal.
Objects of the type std::string if they are not declared in a namespace and without the keyword static has automatic storage duration. It means that when the control is passed to a block such an object is created anew and destroyed each time when the control leaves the block.
Type of isSet ? "Yes" : "No" is const char*, independently of the fact that you store it inside std::string or a const char* (or std::stringview, or ...). (so string literals are treated equally by the compiler).
According to quick-bench.com,
std::string version is ~6 times slower, which is understandable as it requires extra dynamic allocation.
Unless you need the extra feature of std::string, you might stay with const char*.
Equivalent C++ code:
#include <string>
using namespace std::string_literals;
void test_cppstr (bool isSet)
{
const std::string& isSetStr = isSet ? "Yes"s : "No"s;
printf ( "Flag is set ? : %s\n", isSetStr.c_str());
}
Efficient almost like C version.
Edit: This version has small overhead with setup/exit, but has same efficiency as C code in calling printf.
#include <string>
using namespace std::string_literals;
const std::string yes("Yes");
const std::string no("No");
void test_cppstr (bool isSet)
{
const std::string& isSetStr = isSet ? yes : no;
printf ( "Flag is set ? : %s\n", isSetStr.c_str());
}
https://godbolt.org/z/v3ebcsrYE

In NASM, I want to compare an input keystroke based on its ASCII value

A simple program I am working on (for Homework) requires that I take a keystroke as input and return the categories it falls under (is it a printable charater, decimal, etc..)
I'm using cmp to compare the keystroke against the values of the maximum and/or minimum values in it's category (for example if the ASCII code of the keystroke is above 0x7F then it is a printable character)
However, there is obviously something not working in my comparison since no matter what, i.e. when I use the escape button as input, it is not printing "Control Key".
Could it be that keys need some more processing before they can be compared based on ASCII value?
Here is my code
segment .data
controlKey: db "Control Key", 10
controlLen: equ $-controlKey
printableKey: db "Printable", 10
printableLen: equ $-printableKey
decimalKey: db "Decimal", 10
decimalLen: equ $-decimalKey
segment .bss
key resb 2
segment .text
global main
main:
mov eax, 3 ; system call 3 to get input
mov ebx, 0 ; standart input device
mov ecx, key ; pointer to id
mov edx, 2 ; take in this many bytes
int 0x80
control: ; check if it's a control key
mov ebx, 31 ; highest control key
mov edx, key
cmp edx, ebx
jg printable
mov eax, 4
mov ebx, 1
mov ecx, controlKey
mov edx, controlLen
int 0x80
; jmp exit ; It's obviously not any of the other categories
printable: ; Tell that it's a printable symbol
mov eax, 4
mov ebx, 1
mov ecx, printableKey
mov edx, printableLen
int 0x80
decimal:
mov ebx, 30h ; smallest decimal ASCII
mov edx, key
cmp edx, ebx
jl uppercase
mov ebx, 39h ; test against 9
cmp edx, ebx
jg uppercase
mov eax, 4
mov ebx, 1
mov ecx, decimalKey
mov edx, decimalLen
int 0x80
uppercase:
lowercase:
mov eax, 4 ; system call 4 for output
mov ebx, 1 ; standard output device
mov ecx, key ; move the content into ecx
mov edx, 1 ; tell edx how many bytes
int 0x80 ;
exit:
mov eax, 1
xor ebx, ebx
int 0x80
The Escape key won't be read by your application, since it is - most probably - caught by the terminal that your application runs in. I can see that you're using the read syscall in your code, which is, of course, fine, but you should remember that this function only provides reading from a file descriptor, which doesn't necessarily have to contain all the control signals sent from the keyboard. The file descriptor (stdin) doesn't even have to come from the keyboard, since a file might be redirected to your process as standard input.
I don't know if there's a good way of achieving (capturing keystrokes, not the data that they represent - and this is what you're doing now) what you're trying to do just with system calls in Linux. You could try using some terminal controlling library, for example ncurses or termios, but I guess that isn't a part of your assignment.
I have done this a while back, here is a sample to show how to turn character echo on/off, and canonical mode on/off. When run, when you press a key, the keycode will be displayed on the screen, the program will exit once shift+q is pressed:
terminos.asm
ICANON equ 1<<1
ECHO equ 1<<3
sys_exit equ 1
sys_read equ 3
sys_write equ 4
stdin equ 0
stdout equ 1
global _start
SECTION .bss
lpBufIn resb 2
lpBufOut resb 2
termios resb 36
section .text
_start:
call echo_off
call canonical_off
.GetCode:
call GetKeyCode
movzx esi, byte[lpBufIn]
push esi
call PrintNum
pop esi
cmp esi, 81
jne .GetCode
call echo_on
call canonical_on
mov eax, sys_exit
xor ebx, ebx
int 80H
;~ #########################################
GetKeyCode:
mov eax, sys_read
mov ebx, stdin
mov ecx, lpBufIn
mov edx, 1
int 80h
ret
;~ #########################################
canonical_off:
call read_stdin_termios
; clear canonical bit in local mode flags
mov eax, ICANON
not eax
and [termios+12], eax
call write_stdin_termios
ret
;~ #########################################
echo_off:
call read_stdin_termios
; clear echo bit in local mode flags
mov eax, ECHO
not eax
and [termios+12], eax
call write_stdin_termios
ret
;~ #########################################
canonical_on:
call read_stdin_termios
; set canonical bit in local mode flags
or dword [termios+12], ICANON
call write_stdin_termios
ret
;~ #########################################
echo_on:
call read_stdin_termios
; set echo bit in local mode flags
or dword [termios+12], ECHO
call write_stdin_termios
ret
;~ #########################################
read_stdin_termios:
mov eax, 36h
mov ebx, stdin
mov ecx, 5401h
mov edx, termios
int 80h
ret
;~ #########################################
write_stdin_termios:
mov eax, 36h
mov ebx, stdin
mov ecx, 5402h
mov edx, termios
int 80h
ret
PrintNum:
push lpBufOut
push esi
call dwtoa
mov edi, lpBufOut
call GetStrlen
inc edx
mov ecx, lpBufOut
mov eax, sys_write
mov ebx, stdout
int 80H
ret
;~ #########################################
GetStrlen:
push ebx
xor ecx, ecx
not ecx
xor eax, eax
cld
repne scasb
mov byte [edi - 1], 10
not ecx
pop ebx
lea edx, [ecx - 1]
ret
;~ #########################################
dwtoa:
;~ number to convert = [ebp+8]
;~ pointer to buffer that receives number = [ebp+12]
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov eax, [ebp + 8]
mov edi, [ebp + 12]
test eax, eax
jnz .sign
.zero:
mov word [edi], 30H
jmp .done
.sign:
jns .pos
mov byte [edi], "-"
neg eax
add edi, 1
.pos:
mov ecx, 3435973837
mov esi, edi
.doit:
mov ebx, eax
mul ecx
shr edx, 3
mov eax, edx
lea edx, [edx * 4 + edx]
add edx, edx
sub ebx, edx
add bl, "0"
mov [edi], bl
add edi, 1
cmp eax, 0
jg .doit
mov byte [edi], 0
.fixit:
sub edi, 1
mov al, [esi]
mov ah, [edi]
mov [edi], al
mov [esi], ah
add esi, 1
cmp esi, edi
jl .fixit
.done:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 4 * 2
makefile
APP = terminos
$(APP): $(APP).o
ld -o $(APP) $(APP).o
$(APP).o: $(APP).asm
nasm -f elf $(APP).asm