I've seen people define a member function like this:
void getValue(int& v)
{
v = m_value;
}
and also like this:
int getValue()
{
return m_value;
}
I guess the first saves memory? Is that the only time you would use the first type of get-function? The second seems a lot more convenient.
I thought I would godbolt it for you
source
#include <iostream>
struct Foof{
int m_val;
Foof(int v){
m_val = v;
}
void woodle()
{
if(m_val > 42)
m_val++;
else
m_val--;
}
void Get1(int &v)
{
v = m_val;
}
int Get2()
{
return m_val;
}
};
int main(int c, char**v){
int q;
std::cin >> q;
Foof f1(q);
std::cin >> q;
Foof f2(q);
f1.woodle();
f2.woodle();
int k;
f1.Get1(k);
int j = f2.Get2();
std::cout << k << j;
}
the woodle function and the cin to initialize is to make the compiler think a bit
I have 2 foofs otherwise the compiler goes "well I know the answer to this question" when I call Get2 after Get1
compiled with -03 - ie optimize hard. The code comes out as (gcc)
pushq %rbx
movl $_ZSt3cin, %edi
subq $16, %rsp
leaq 12(%rsp), %rsi
call std::basic_istream<char, std::char_traits<char> >::operator>>(int&)
movl 12(%rsp), %ebx
leaq 12(%rsp), %rsi
movl $_ZSt3cin, %edi
call std::basic_istream<char, std::char_traits<char> >::operator>>(int&)
movl 12(%rsp), %eax
movl $_ZSt4cout, %edi
leal 1(%rbx), %edx
cmpl $43, %ebx
leal -1(%rbx), %esi
cmovge %edx, %esi
leal -1(%rax), %ebx
leal 1(%rax), %edx
cmpl $43, %eax
cmovge %edx, %ebx
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
movq %rax, %rdi
movl %ebx, %esi
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
addq $16, %rsp
xorl %eax, %eax
popq %rbx
ret
I separated out the actual calls to Get1 or Get2 you can see that
the generated code is identical
the compiler is very aggressive at optimizing, there are no function calls etc
Lesson, write your code to be human readable and let the compiler do the heavy lifting
Related
Let's say I have this code:
int v;
setV(&v);
for (int i = 0; i < v - 5; i++) {
// Do stuff here, but don't use v.
}
Will the operation v - 5 be run every time or will a modern compiler be smart enough to store it once and never run it again?
What if I did this:
int v;
setV(&v);
const int cv = v;
for (int i = 0; i < cv - 5; i++) {
// Do stuff here. Changing cv is actually impossible.
}
Would the second style make a difference?
Edit:
This was an interesting question for an unexpected reason. It's more a question of the compiler avoiding the obtuse case of an unintended aliasing of v. If the compiler can prove that this won't happen (version 2) then we get better code.
The lesson here is to be more concerned with eliminating aliasing than trying to do the optimiser's job for it.
Making the copy cv actually presented the biggest optimisation (elision of redundant memory fetches), even though at a first glance it would appear to be (slightly) less efficient.
original answer and demo:
Let's see:
given:
extern void setV(int*);
extern void do_something(int i);
void test1()
{
int v;
setV(&v);
for (int i = 0; i < v - 5; i++) {
// Do stuff here, but don't use v.
do_something(i);
}
}
void test2()
{
int v;
setV(&v);
const int cv = v;
for (int i = 0; i < cv - 5; i++) {
// Do stuff here. Changing cv is actually impossible.
do_something(i);
}
}
compile on gcc5.3 with -x c++ -std=c++14 -O2 -Wall
gives:
test1():
pushq %rbx
subq $16, %rsp
leaq 12(%rsp), %rdi
call setV(int*)
cmpl $5, 12(%rsp)
jle .L1
xorl %ebx, %ebx
.L5:
movl %ebx, %edi
addl $1, %ebx
call do_something(int)
movl 12(%rsp), %eax
subl $5, %eax
cmpl %ebx, %eax
jg .L5
.L1:
addq $16, %rsp
popq %rbx
ret
test2():
pushq %rbp
pushq %rbx
subq $24, %rsp
leaq 12(%rsp), %rdi
call setV(int*)
movl 12(%rsp), %eax
cmpl $5, %eax
jle .L8
leal -5(%rax), %ebp
xorl %ebx, %ebx
.L12:
movl %ebx, %edi
addl $1, %ebx
call do_something(int)
cmpl %ebp, %ebx
jne .L12
.L8:
addq $24, %rsp
popq %rbx
popq %rbp
ret
The second form is better on this compiler.
Why compiler put so much commands before function call (look at the link below)? As I understand, it should pass only function parameters before call.
struct A{
int c = 5;
void test(unsigned int a){
a++;
c++;
}
};
struct C{
int k =2;
A a;
};
struct D{
int k =2;
C c;
};
struct B{
int k =2;
D d;
};
void test(unsigned int a){
a++;
}
B *b = new B();
A *ae = new A();
int main()
{
int a = 1;
A ai;
B bi;
C ci;
// 2 operations (why not pop/push ?)
// movl -36(%rbp), %eax
// movl %eax, %edi
// call test(unsigned int)
test(a);
// 4 operations (why 4? we pass something else?)
// movl -36(%rbp), %edx
// leaq -48(%rbp), %rax
// movl %edx, %esi
// movq %rax, %rdi
// call A::test(unsigned int)
ai.test(a);
ae->test(a);
// 5 operations before call (what a hell is going here?, why that "addq" ?)
// movl -36(%rbp), %eax
// leaq -32(%rbp), %rdx
// addq $4, %rdx
// movl %eax, %esi
// movq %rdx, %rdi
// call A::test(unsigned int)
ci.a.test(a);
bi.d.c.a.test(a);
b->d.c.a.test(a);
// no matter how long this chain will be - it will always took 5 operations
}
http://goo.gl/smFSA6
Why when we call class member, it took 4 additional commands to prepare to call? We load object address to register, as well?
And the last case with 5 ops, is just beyond me...
P.S. In the days of my youth, usually, we put function params to stack (push), than read them (pop). Now what, we pass parameters through registers?
It's normal. In assembly I intruction is usually only doing one thing. for example in the last case:
movl -36(%rbp), %eax ; move a to %eax
leaq -32(%rbp), %rdx ; move & ci to %rdx
addq $4, %rdx ; set %rdx to ci->a = ci + offset of a
movl %eax, %esi ; move a from %eax to %esi (second parameter)
movq %rdx, %rdi ; move ci->a from %rdx to %rdi (first parameter)
call A::test(unsigned int) ; call A::test
In 64 bit linux systems function parameters are no longer transferred on the stack, the first 6 integer parameters are transferred in %rdi, %rsi, %rdx, %rcx, %r8, %r9 registers. Floating point values use the %xmm0 - %xmm7 registers, and the others are transferred on the stack.
The local variables of course are located on the stack and accessed through %rbp
I'm starting to try to mess around with inlining ASM in C++, so I wrote up this little snippet:
#include <iostream>
int foo(int, int, int);
int main(void)
{
return foo(1,2,3);
}
int foo(int a, int b, int c)
{
asm volatile("add %1, %0\n\t"
"add %2, %0\n\t"
"add $0x01, %0":"+r"(a):"r"(b), "r"(c):"cc");
}
Which outputs the following assembly code:
main:
.LFB969:
subq $40, %rsp
.seh_stackalloc 40
.seh_endprologue
call __main
movl $3, %r8d
movl $2, %edx
movl $1, %ecx
call _Z3fooiii
... stuff not shown...
_Z3fooiii:
.LFB970:
.seh_endprologue
movl %ecx, 8(%rsp)
movl %edx, 16(%rsp)
movl %r8d, 24(%rsp)
movl 16(%rsp), %edx
movl 24(%rsp), %ecx
movl 8(%rsp), %eax
/APP
# 15 "K:\inline_asm_practice_1.cpp" 1
add %edx, %eax
add %ecx, %eax
add $0x01, %eax
# 0 "" 2
/NO_APP
movl %eax, 8(%rsp)
ret
So I can see where it inputs my code, but what's with the stack manipulations above it? Is there any way I can get rid of them; they seem unnecessary. I should just be able to have
(in main)
movl $3, %r8d
movl $2, %edx
movl $1, %ecx
call _Z3fooiii
(in foo)
add %edx, %ecx
add %r8d, %eax
add $0x01, %eax
ret
How do I make gcc understand that it doesn't need to shove things on the stack and bring them back in a different order? I've fried fastcall and regparam already, and I can't find anything aboout this.
You probably need to enable optimizations via something like -O2 in order to get the compiler to try and write better/faster code, instead simpler/easier to debug/understand code.
In C++, an object's destructor is called at the closing "}" for the block it was created in, right? So this means that if I have:
while(some_condition)
{
SomeClass some_object;
some_object.someFunction();
some_variable = some_object.some_member;
}
Then the destructor for the object created in one iteration of the loop will be called at the end of the loop before another object is created, correct?
Thanks.
Yes.
But you could have tested it yourself. This is a language feature that compilers are unlikely to get wrong.
#include <iostream>
struct S {
S() { std::cout << "S::S()\n"; }
~S() { std::cout << "S::~S()\n"; }
};
int main () {
int i = 10;
while(i--) {
S s;
}
}
The observable behaviour is that it's called each iteration.
The usual rules about optimisations still apply though. If the compiler is smart and the object simple then compiler can do anything it likes that still produces the correct behaviour, e.g.:
#include <iostream>
struct foo {
int i;
foo() : i (-1) {}
~foo() { i = 1; }
};
int main() {
int i = 10;
while (--i) {
foo f;
std::cout << f.i;
}
}
Compiles to:
.Ltmp5:
.cfi_def_cfa_register %rbp
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
movl $_ZSt4cout, %edi
movl $-1, %esi
callq _ZNSolsEi
xorl %eax, %eax
popq %rbp
ret
I.e. unrolled and no sign of that destructor in there (although the observable behaviour is still the same).
I have to write some member functions of a class in Assembly for an exam. I've followed every instruction but I still can't get it to work. Here are the relevant files. The header and the main method are already provided, I just need to write the constructor and the elab1 method.
Class header
#include <iostream>
using namespace std;
struct st { int a; int vv1[4]; double vv2[4]; };
class cl
{ double b; st s;
public:
cl(int *p, double *d);
void elab1(st ss);
void stampa()
{ int i; cout << b << ' ' << s.a << endl;
for (i=0;i<4;i++) cout << s.vv1[i] << ' '; cout << '\t';
for (i=0;i<4;i++) cout << s.vv2[i] << ' '; cout << endl;
cout << endl;
}
};
Main method for testing
// prova1.cpp
#include "cc.h" // class header
int main()
{
st s = {1, 1,2,3,4, 1,2,3,4 };
int v[4] = {10,11,12,13 };
double d[4] = { 2, 3, 4, 5 };
cl cc1(v, d);
cc1.stampa();
cc1.elab1(s);
cc1.stampa();
}
And this is my assembly:
# es1.s
.text
.global __ZN2clC1EPiPe
__ZN2clC1EPiPe:
pushl %ebp
movl %esp, %ebp
subl $4, %esp
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
pushl %esi
cmpl $0, 12(%ebp)
je fine
cmpl $0, 16(%ebp)
je fine
movl 8(%ebp), %eax
movl 12(%ebp), %ebx
movl 4(%ebx), %ecx
movl %ecx, 12(%eax)
fldz
fstpl (%eax)
movl $0, -4(%ebp)
ciclo:
cmpl $4, -4(%ebp)
je fine
movl -4(%ebp), %esi
movl 12(%ebp), %ebx
movl (%ebx, %esi, 4), %ecx
subl %esi, %ecx
movl %ecx, 16(%eax, %esi, 4)
movl 16(%ebp), %ebx
pushl %eax
movl %esi, %eax
movl $3, %ecx
imull %ecx
movl %eax, %edx
popl %eax
movl 12(%ebp), %ecx
fldl (%ebx, %edx, 4)
fldl (%ecx, %esi, 4)
faddp %st, %st(1)
fstpl 32(%eax, %edx, 4)
fldl (%ebx, %edx, 4)
fldl (%eax)
faddp %st, %st(1)
fstpl (%eax)
incl -4(%ebp)
jmp ciclo
fine:
popl %esi
popl %edx
popl %ecx
popl %ebx
popl %eax
movl 8(%ebp), %eax
leave
ret
.global __ZN2cl5elab1E2st
__ZN2cl5elab1E2st: #TODO
I try to compile and link everything with the command-line statement that has been provided to us:
g++ -o es1 -fno-elide-constructors es1.s prova1.cpp
but it only gives me a bunch of undefined references:
/tmp/ccbwS0uN.o: In function `main':
prova1.cpp:(.text+0xee): undefined reference to `cl::cl(int*, double*)'
prova1.cpp:(.text+0x192): undefined reference to `cl::elab1(st)'
collect2: ld returned 1 exit status
Do you have any idea how can I solve this issue? I thought that probably I might have translated the function names in the wrong way, but I've checked them several times.
Apply c++filt to your name mangling and compare to the signature in the error message.
When removing one underscore and filtering with c++filt, I get for your mangled name cl::cl(int*, long double*) which does not match any in your error message/class declaration.
The correctly mangled name should be _ZN2clC1EPiPd for cl::cl(int*, double*).
I suggest that you improve the way (wahtever it is) to get the mangled name.