Related
I have this test program, using a #define constant:
#include <stdio.h>
#define FOO 1
int main()
{
printf("%d\n", FOO);
return 0;
}
When compiled with “Apple LLVM version 10.0.0 (clang-1000.11.45.5)”, I get an executable of 8432 bytes. Here is the assembly listing:
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 14
.globl _main ## -- Begin function main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movl $1, %esi
movl $0, -4(%rbp)
movb $0, %al
callq _printf
xorl %esi, %esi
movl %eax, -8(%rbp) ## 4-byte Spill
movl %esi, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
## -- End function
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "%d\n"
.subsections_via_symbols
Now I replace #define FOO 1 with const int FOO = 1;. The executable is now 8464 bytes and the assembly listing looks like this:
.section __TEXT,__text,regular,pure_instructions
.build_version macos, 10, 14
.globl _main ## -- Begin function main
.p2align 4, 0x90
_main: ## #main
.cfi_startproc
## %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq L_.str(%rip), %rdi
movl $1, %esi
movl $0, -4(%rbp)
movb $0, %al
callq _printf
xorl %esi, %esi
movl %eax, -8(%rbp) ## 4-byte Spill
movl %esi, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
## -- End function
.section __TEXT,__const
.globl _FOO ## #FOO
.p2align 2
_FOO:
.long 1 ## 0x1
.section __TEXT,__cstring,cstring_literals
L_.str: ## #.str
.asciz "%d\n"
.subsections_via_symbols
So it actually declared a FOO variable, making the executable 32 bytes bigger.
I get the same result with -O3 optimization level.
Why is that? Normally, the compiler should be intelligent enough to optimize and add the constant to the symbol table instead of taking up storage for it.
This is another case where the difference between C and C++ matters.
In C, const int FOO has external linkage and must thus be included in the binary.
Compiling with g++ or clang++ instead gives you the desired optimization as FOO has internal linkage in C++.
You can achieve the optimization in C mode by explicitly requesting internal linkage for FOO via
static const int FOO = 1;
Both clang and gcc with link-time optimization enabled (-flto) also manage to strip away the unused symbol, even when linkage is external. (Live with and without LTO.)
The fact that you use the variable FOO in your second program means that it has to live somewhere, so the compiler needs to allocate it somewhere.
In the #define case, there is no variable - the pre-processor substituted the text "FOO" with the text "1" an so the call to printf() was passed a constant value, not a variable.
I would like to know how to implement this lines of code into x86 masm assembly:
if (x >= 1 && x <= 100) {
printsomething1();
} else if (x >= 101 && x <= 200) {
printsomething2();
} else {
printsomething3();
}
I'd break it into contiguous ranges, (assuming x is unsigned) like:
x is 0, do printsomething3()
x is 1 to 100, do nothing printsomething1()
x is 101 to 200, do nothing printsomething2()
x is 201 or higher, do nothing printsomething3()
Then work from lowest to highest, like:
;eax = x;
cmp eax,0
je .printsomething3
cmp eax,100
jbe .printsomething1
cmp eax,200
jbe .printsomething2
jmp .printsomething3
If the only difference is the string they print (and not the code they use to print it) I'd go one step further:
mov esi,something3 ;esi = address of string if x is 0
cmp eax,0
je .print
mov esi,something1 ;esi = address of string if x is 1 to 100
cmp eax,100
jbe .print
mov esi,something2 ;esi = address of string if x is 101 to 200
cmp eax,200
jbe .print
mov esi,something3 ;esi = address of string if x is 201 or higher
jmp .print
If you have access to a decent C compiler, you can compile it into assembly language. For gcc use the -S flag:
gcc test.c -S
This creates the file test.s which contains the assembly language output which can be assembled and linked if needed.
For example, to make your code compile successfully, I rewrote it slightly to this:
#include <stdio.h>
#include <stdlib.h>
void printsomething (int y)
{
printf ("something %d", y);
}
void func (int x)
{
if (x >= 1 && x <= 100)
printsomething(1);
else
if (x >= 101 && x <= 200)
printsomething(2);
else
printsomething(3);
}
int main (int argc, char **argv)
{
int x = 0;
if (argc > 1)
x = atoi (argv [1]);
return 0;
}
It compiles into this assembler:
.file "s.c"
.text
.section .rodata
.LC0:
.string "something %d"
.text
.globl printsomething
.type printsomething, #function
printsomething:
.LFB5:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
movl %eax, %esi
movl $.LC0, %edi
movl $0, %eax
call printf
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE5:
.size printsomething, .-printsomething
.globl func
.type func, #function
func:
.LFB6:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
cmpl $0, -4(%rbp)
jle .L3
cmpl $100, -4(%rbp)
jg .L3
movl $1, %edi
call printsomething
jmp .L4
.L3:
cmpl $100, -4(%rbp)
jle .L5
cmpl $200, -4(%rbp)
jg .L5
movl $2, %edi
call printsomething
jmp .L4
.L5:
movl $3, %edi
call printsomething
.L4:
nop
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE6:
.size func, .-func
.globl main
.type main, #function
main:
.LFB7:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -20(%rbp)
movq %rsi, -32(%rbp)
movl $0, -4(%rbp)
cmpl $1, -20(%rbp)
jle .L7
movq -32(%rbp), %rax
addq $8, %rax
movq (%rax), %rax
movq %rax, %rdi
call atoi
movl %eax, -4(%rbp)
.L7:
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE7:
.size main, .-main
.ident "GCC: (GNU) 7.3.1 20180712 (Red Hat 7.3.1-6)"
.section .note.GNU-stack,"",#progbits
Examine the func: part of it and you'll see how it sets up the comparisons with 1, 100, 101, etc.
I've created following program :
class CLexer
{
public:
CLexer( ) {
iCursorPos = 0;
}
void putCharacter(char character)
{
if(character != ' ' && character != '\n') {
m_strToken[iCursorPos] = character;
iCursorPos++;
}
else {
m_strToken[iCursorPos] = '\0';
iCursorPos = 0;
}
}
private:
char m_strToken[1024];
int iCursorPos = 0;
};
int main(int argc, char * argv[]) {
CLexer lex;
lex.putCharacter('m');
return 0;
}
Assembler output produced by compiler :
.file "main.cpp"
.section .text._ZN6CLexerC2Ev,"axG",#progbits,_ZN6CLexerC5Ev,comdat
.align 2
.weak _ZN6CLexerC2Ev
.type _ZN6CLexerC2Ev, #function
_ZN6CLexerC2Ev:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $0, 1024(%rax)
movq -8(%rbp), %rax
movl $0, 1024(%rax)
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size _ZN6CLexerC2Ev, .-_ZN6CLexerC2Ev
.weak _ZN6CLexerC1Ev
.set _ZN6CLexerC1Ev,_ZN6CLexerC2Ev
.section .text._ZN6CLexer12putCharacterEc,"axG",#progbits,_ZN6CLexer12putCharacterEc,comdat
.align 2
.weak _ZN6CLexer12putCharacterEc
.type _ZN6CLexer12putCharacterEc, #function
_ZN6CLexer12putCharacterEc:
.LFB3:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movl %esi, %eax
movb %al, -12(%rbp)
cmpb $32, -12(%rbp)
je .L3
cmpb $10, -12(%rbp)
je .L3
movq -8(%rbp), %rax
movl 1024(%rax), %eax
movq -8(%rbp), %rdx
cltq
movzbl -12(%rbp), %ecx
movb %cl, (%rdx,%rax)
movq -8(%rbp), %rax
movl 1024(%rax), %eax
leal 1(%rax), %edx
movq -8(%rbp), %rax
movl %edx, 1024(%rax)
jmp .L4
.L3:
movq -8(%rbp), %rax
movl 1024(%rax), %eax
movq -8(%rbp), %rdx
cltq
movb $0, (%rdx,%rax)
movq -8(%rbp), %rax
movl $0, 1024(%rax)
.L4:
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3:
.size _ZN6CLexer12putCharacterEc, .-_ZN6CLexer12putCharacterEc
.text
.globl main
.type main, #function
main:
.LFB4:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $1056, %rsp
movl %edi, -1044(%rbp)
movq %rsi, -1056(%rbp)
leaq -1040(%rbp), %rax
movq %rax, %rdi
call _ZN6CLexerC1Ev
leaq -1040(%rbp), %rax
movl $109, %esi
movq %rax, %rdi
call _ZN6CLexer12putCharacterEc
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE4:
.size main, .-main
.ident "GCC: (GNU) 6.1.1 20160501"
.section .note.GNU-stack,"",#progbits
And after execution, first call to putCharacter method with 'm' character as parameter is throwing segfault.
Attached gdb is giving following output :
Program received signal SIGSEGV, Segmentation fault.
0x00000000004018e5 in CLexer::putCharacter (this=0x7fffffffe370,
character=109 'm') at src/main.cpp:60
60 m_strToken[iCursorPos] = character;
I've managed to fix this error by moving iCursorPos variable above m_strToken in class declaration but i think it isn't proper way to fix this issue.
I'm using g++ (GCC) 6.1.1 20160501 on the lastest and updated version of ArchLinux x86_64.
if(character != ' ' && character != '\n') {
m_strToken[iCursorPos] = character;
iCursorPos++;
}
You don't check that iCursorPos < 1024 here. So you write past the end of the buffer, into iCursorPos itself.
The next access m_strToken[iCursorPos] = character; probably writes way past the end of the buffer, and you get a segfault (luckily).
Your "fix" still isn't correct, since you corrupt other parts of your objects memory regardless.
I already know that the new[] operator first allocates memory and then calls the constructor for each element and that the delete[] operator first calls the destructor for each element and then frees memory and, because of that, they both have an O(n) time complexity.
But if I have a class, for which I have not defined any constructor/destructor, will the complexity still be O(n), or will it be just O(1)?
For instance, if I have two classes:
class foo
{
public:
int a;
foo()
{
a = 0;
// more stuff
}
~foo()
{
a = 1;
// some useful stuff here
}
};
class boo
{
public:
int a;
};
And I create two arrays of them like this:
int n = 1000;
foo* pfoo = new foo[n];
boo* pboo = new boo[n];
I'm pretty sure the first new call will have an O(n) complexity, but what about the second? Will new just allocate the necessary memory and that's it, or will it call some default constructor (I'm not sure if such thing actually exits in C++) for each element?
And the same question for delete:
delete [] pfoo;
delete [] pboo;
When I delete the second array will the complexity still be O(n), or will delete just deallocate the memory in O(1) complexity?
When you don't know, it's great idea to use assembly output. For example, let's assume this is the code to compare.
class foo
{
public:
int a;
foo()
{
a = 0;
// more stuff
}
~foo()
{
a = 1;
// some useful stuff here
}
};
class boo
{
public:
int a;
};
void remove_foo(foo* pfoo) {
delete [] pfoo;
}
void remove_boo(boo *pboo) {
delete [] pboo;
}
When compiling with optimizations using gcc (Clang gives similar output), you get the following result.
.file "deleter.cpp"
.text
.p2align 4,,15
.globl _Z10remove_fooP3foo
.type _Z10remove_fooP3foo, #function
_Z10remove_fooP3foo:
.LFB6:
.cfi_startproc
testq %rdi, %rdi
je .L1
movq -8(%rdi), %rax
leaq (%rdi,%rax,4), %rax
cmpq %rax, %rdi
je .L4
.p2align 4,,10
.p2align 3
.L6:
subq $4, %rax
movl $1, (%rax)
cmpq %rax, %rdi
jne .L6
.L4:
subq $8, %rdi
jmp _ZdaPv
.p2align 4,,10
.p2align 3
.L1:
rep ret
.cfi_endproc
.LFE6:
.size _Z10remove_fooP3foo, .-_Z10remove_fooP3foo
.p2align 4,,15
.globl _Z10remove_booP3boo
.type _Z10remove_booP3boo, #function
_Z10remove_booP3boo:
.LFB7:
.cfi_startproc
testq %rdi, %rdi
je .L8
jmp _ZdaPv
.p2align 4,,10
.p2align 3
.L8:
rep ret
.cfi_endproc
.LFE7:
.size _Z10remove_booP3boo, .-_Z10remove_booP3boo
.ident "GCC: (SUSE Linux) 4.8.1 20130909 [gcc-4_8-branch revision 202388]"
.section .note.GNU-stack,"",#progbits
It's easy to tell that for foo it calls destructor, but for boo it directly calls delete [] function (_ZdaPv after name mangling). This also happens without optimizations. The code is longer, because methods are actually output, but it's still noticeable that delete [] is called directly for boo.
.file "deleter.cpp"
.section .text._ZN3fooD2Ev,"axG",#progbits,_ZN3fooD5Ev,comdat
.align 2
.weak _ZN3fooD2Ev
.type _ZN3fooD2Ev, #function
_ZN3fooD2Ev:
.LFB4:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $1, (%rax)
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE4:
.size _ZN3fooD2Ev, .-_ZN3fooD2Ev
.weak _ZN3fooD1Ev
.set _ZN3fooD1Ev,_ZN3fooD2Ev
.text
.globl _Z10remove_fooP3foo
.type _Z10remove_fooP3foo, #function
_Z10remove_fooP3foo:
.LFB6:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %rbx
subq $24, %rsp
.cfi_offset 3, -24
movq %rdi, -24(%rbp)
cmpq $0, -24(%rbp)
je .L3
movq -24(%rbp), %rax
subq $8, %rax
movq (%rax), %rax
leaq 0(,%rax,4), %rdx
movq -24(%rbp), %rax
leaq (%rdx,%rax), %rbx
.L6:
cmpq -24(%rbp), %rbx
je .L5
subq $4, %rbx
movq %rbx, %rdi
call _ZN3fooD1Ev
jmp .L6
.L5:
movq -24(%rbp), %rax
subq $8, %rax
movq %rax, %rdi
call _ZdaPv
.L3:
addq $24, %rsp
popq %rbx
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE6:
.size _Z10remove_fooP3foo, .-_Z10remove_fooP3foo
.globl _Z10remove_booP3boo
.type _Z10remove_booP3boo, #function
_Z10remove_booP3boo:
.LFB7:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %rdi, -8(%rbp)
cmpq $0, -8(%rbp)
je .L7
movq -8(%rbp), %rax
movq %rax, %rdi
call _ZdaPv
.L7:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE7:
.size _Z10remove_booP3boo, .-_Z10remove_booP3boo
.ident "GCC: (SUSE Linux) 4.8.1 20130909 [gcc-4_8-branch revision 202388]"
.section .note.GNU-stack,"",#progbits
This also applies to new []. _Znam is called directly, without constructing objects, even without optimizations.
Generally, that means custom constructors or destructors mean that new [] and delete [] won't be executed in constant time. But if there aren't, the compiler doesn't try to call constructors or destructors for these, and they will be POD data types, which means that constructing these objects is simple malloc-like call. There are some exceptions (involving various optimizations), but usually code with constructors/destructors will be O(N), and without will be O(1), assuming O(1) new []/delete [] implementation.
It depends on your exact syntax:
auto x = new unsigned[2];
auto y = new unsigned[2]();
::std::cout << x[0] << "\n" << x[1] << "\n" << y[0] << "\n" << y[1] << "\n";
delete[] x;
delete[] y;
gives the output (on my machine):
3452816845
3452816845
0
0
Because one will be default initialized and the other value initialized.
delete[] on the other hand is even simpler to understand: If your data type has a destructor, it will be called. The built in (and thus POD) types generally do not.
MyClass *p = static_cast<MyClass*> (::operator new (sizeof(MyClass[N])));
Allocates memory for N objects and does not construct them. In that way the complexity will be the same as malloc(). It will be obviously faster, then allocating and constructing objects of complex class.
But if I have a class, for which I have not defined any constructor/destructor, will the complexity still be O(n), or will it be just O(1)?
The members themselves might still have destructors. In short, for PODs, delete[] will be O(1).
Consider the following code
#include <stdio.h>
#include <string.h>
main()
{
const int a = 2;
long p = (long)&a;
int *c = (int *)p;
*c =3;
printf("%d", a);
}
This code can change the value to a in C but not in C++. I understand that C++ is applying optimization and replacing instances of a with 2. So was this a bug fix in C++ or was the bug fixed by chance due to optimization?
It's undefined behavior to modify a const value no matter directly or indirectly. This may compile in C and may even run without problem on your machine, but it's still undefined behavior.
The difference between C and C++ on this is: with const int a = 2, C++ treats a as a constant expression, for instance, you can use a as array dimension:
int n[a]; //fine in C++
But in C, a is not a constant expression, with the same code:
int n[a]; //VLA in C99
Here n is not a fixed-sized array, but a variable length array.
This is not a C vs C++ issue. By modifying a const value (as well as by double-casting a pointer via a long), you enter the realm of undefined behaviour in both languages. Therefore the difference is simply a matter of how the undefined behaviour chooses to manifest itself.
You are casting away the constness out of &a and modifying the pointed value, which is undefined behavior both in C and in C++ (the trip through long just adds some more gratuitous UB). In C++ your compiler happens to optimize more aggressively the constant, but the point of the situation is unchanged.
Your code generates undefined behavior on C++ since you're accessing memory you shouldn't
include <stdio.h>
#include <string.h>
void main()
{
const int a = 2;
printf("%x != %x !!", sizeof(long), sizeof(void*)); // on a x64 system 4 != 8
long p = (long)&a;
int *c = (int *)p;
*c =3;
printf("%d", a);
}
and even if it works on a 32 bit system modifying const memory by casting away the constness is undefined behavior in both languages.
Following is the assembly code generated by g++. The compiler statically use "$2" instead of "a", but in case of gcc it doesn't perform any static optimization. I guess there shouldn't be any undefined behaviour.
.Ltext0:
.section .rodata
.LC0:
0000 256400 .string "%d"
.text
.globl main
main:
.LFB0:
.cfi_startproc
.cfi_personality 0x3,__gxx_personality_v0
.cfi_lsda 0x3,.LLSDA0
0000 55 pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
0001 4889E5 movq %rsp, %rbp
.cfi_def_cfa_register 6
0004 4883EC20 subq $32, %rsp
.LBB2:
0008 C745EC02 movl $2, -20(%rbp)
000000
000f 488D45EC leaq -20(%rbp), %rax
0013 488945F0 movq %rax, -16(%rbp)
0017 488B45F0 movq -16(%rbp), %rax
001b 488945F8 movq %rax, -8(%rbp)
001f 488B45F8 movq -8(%rbp), %rax
0023 C7000300 movl $3, (%rax)
0000
0029 488B45F8 movq -8(%rbp), %rax
002d 8B00 movl (%rax), %eax
002f 89C6 movl %eax, %esi
0031 BF000000 movl $.LC0, %edi
00
0036 B8000000 movl $0, %eax
00
.LEHB0:
003b E8000000 call printf
00
0040 BE020000 movl $2, %esi
00
0045 BF000000 movl $.LC0, %edi
00
004a B8000000 movl $0, %eax
00
004f E8000000 call printf
00
.LEHE0:
0054 B8000000 movl $0, %eax
00
0059 EB08 jmp .L5
.L4:
005b 4889C7 movq %rax, %rdi
.LEHB1:
005e E8000000 call _Unwind_Resume
00
.LEHE1:
.L5:
.LBE2:
0063 C9 leave
.cfi_def_cfa 7, 8
0064 C3 ret
.cfi_endproc
.LFE0:
.globl __gxx_personality_v0
.section .gcc_except_table,"a",#progbits
.LLSDA0:
0000 FF .byte 0xff
0001 FF .byte 0xff
0002 01 .byte 0x1
0003 08 .uleb128 .LLSDACSE0-.LLSDACSB0
.LLSDACSB0:
0004 3B .uleb128 .LEHB0-.LFB0
0005 19 .uleb128 .LEHE0-.LEHB0
0006 5B .uleb128 .L4-.LFB0
0007 00 .uleb128 0
0008 5E .uleb128 .LEHB1-.LFB0
0009 05 .uleb128 .LEHE1-.LEHB1
000a 00 .uleb128 0
000b 00 .uleb128 0
.LLSDACSE0:
.text
.Letext0: