I want to use PerformanceCounter to measure how much time I need for some operation.
I don't know much about PerformanceCounter and C++ in general. I found some code here:
How to use QueryPerformanceCounter?
I'm getting weird results with this. Here is my try:
#include <Windows.h>
// ...
double PCFreq = 0.0;
__int64 CounterStart = 0;
void StartCounter()
{
LARGE_INTEGER li;
if (!QueryPerformanceFrequency(&li))
printf("QueryPerformanceFrequency failed!\n");
PCFreq = double(li.QuadPart) / 1000.0;
//printf("Performance counter resolution: %f", PCFreq);
QueryPerformanceCounter(&li);
CounterStart = li.QuadPart;
}
double GetCounter()
{
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return double(li.QuadPart - CounterStart) / PCFreq;
}
int main(int argc, const char** argv) {
while (true) {
StartCounter();
Sleep(1000); // just a test
printf("Query frame: %d\n", GetCounter());
// ...
}
}
And here is my weird result with negative numbers:
What is wrong with my code?
You print a double as a float, use %f:
printf("Query frame: %f\n", GetCounter());
Related
When I call QueryPerformanceCounter many times in a shot time, it gives me a wrong output. The code is working an amount of loop iteration, and at a point the QueryPerformanceCounter returns 2 consecutive different values.
Code:
#include <stdio.h>
#include <windows.h>
#include <stdlib.h>
#include <math.h>
//timer (us)
double PCFreq = 0.0;
__int64 CounterStart = 0;
//prototypage
double micros(void);
void StartCounter(void);
int main()
{
StartCounter();
//initialisation time
printf("go\n");
for (long i = 0; i < 100000000; i += 1) {
double time_stamp1 = micros();
double time_stamp2 = micros();
if (fabs(time_stamp1 - time_stamp2) > 1000) {
printf("time_stamp1 : %lf\n", time_stamp1);
printf("time_stamp2 : %lf\n", time_stamp2);
printf("i : %ld\n", i);
break;
}
}
printf("finish\n");
}
void StartCounter() {
LARGE_INTEGER li;
if (!QueryPerformanceFrequency(&li)) {
printf("not initialized");
}
PCFreq = double(li.QuadPart) / 1000000.0;
QueryPerformanceCounter(&li);
CounterStart = li.QuadPart;
}
double micros() {
LARGE_INTEGER li;
QueryPerformanceCounter(&li);
return double(li.QuadPart - CounterStart) / PCFreq;
}
Unexpected output:
go
time_stamp1 : 709840.300000
time_stamp2 : 712229.000000
i : 5524494
finish
This question already has answers here:
What is an undefined reference/unresolved external symbol error and how do I fix it?
(39 answers)
Closed 6 years ago.
I tried creating a Stopwatch struct whose purpose is to measure time using the Windows QueryPerformanceCounter() function, and if it does not exist or is not supported, it falls back to using clock() in time.h.
Unfortunately a Linker error is taking place as follows:
Error LNK2001: unresolved external symbol "private: static struct Stopwatch::StopwatchInitializer Stopwatch::platformInfo" (?platformInfo#Stopwatch##0UStopwatchInitializer#1#A) in Main.obj
When I change platformInfo to non-static, the application works, but if marked static, it doesn't.
My intention is to leave it static so that it is initialized only once, but the Linker is not cooperating.
Why is that? Here is my code:
StdAfx.h:
#pragma once
#include "time.h"
#ifdef _WIN32
#include "windows.h"
#endif
//A stopwatch function provider that offers clock tick accuracy
struct Stopwatch {
//holds the platform data required to initialize the stopwatch
private:
struct StopwatchInitializer {
public:
double nanosecondsPerTick;
bool isHighResolution;
StopwatchInitializer() {
#ifdef _WIN32
LARGE_INTEGER value;
isHighResolution = !!QueryPerformanceFrequency(&value);
nanosecondsPerTick = isHighResolution ? 1000000000.0 / value.QuadPart : 1000000000.0 / CLOCKS_PER_SEC;
#elif
isHighResolution = false;
nanosecondsPerTick = 1000000000.0 / CLOCKS_PER_SEC;
#endif
}
};
//static single instance of the StopwatchInitializer instance
//here is the error though
static const StopwatchInitializer platformInfo;
double startTimeStamp, elapsed;
bool isRunning;
public:
//Initializes the stopwatch.
Stopwatch() {
elapsed = 0.0;
startTimeStamp = 0.0;
isRunning = false;
}
//Initializes the stopwatch with the specified pre-elapsed ticks.
Stopwatch(double preElapsedTicks) {
elapsed = preElapsedTicks;
startTimeStamp = 0.0;
isRunning = false;
}
//Gets the current time in platform-specific ticks.
double getTimeStamp() {
#ifdef _WIN32
if (platformInfo.isHighResolution) {
LARGE_INTEGER num;
QueryPerformanceCounter(&num);
return (double) num.QuadPart;
} else
return (double) clock();
#elif
return (double) clock();
#endif
}
//Starts or resumes the stopwatch.
void start() {
if (isRunning)
return;
isRunning = true;
startTimeStamp = getTimeStamp();
}
//Stops the stopwatch.
void stop() {
if (isRunning) {
isRunning = false;
elapsed += getTimeStamp() - startTimeStamp;
}
}
//Gets whether the stopwatch is running
int getIsRunning() {
return isRunning;
}
//Sets whether the stopwatch is running (boolean value).
void setIsRunning(bool value) {
if (value)
start();
else
stop();
}
//Gets the current elapsed ticks.
double getElapsedTicks() {
return isRunning ? (elapsed - startTimeStamp) + getTimeStamp() : elapsed;
}
//Gets the elapsed time in fortnights.
double getElapsedFortnights() {
return (platformInfo.nanosecondsPerTick * 0.00000000000000082671957671957672) * getElapsedTicks();
}
//Gets the elapsed time in weeks.
double getElapsedWeeks() {
return (platformInfo.nanosecondsPerTick * 0.00000000000000165343915343915344) * getElapsedTicks();
}
//Gets the elapsed time in days.
double getElapsedDays() {
return (platformInfo.nanosecondsPerTick * 0.00000000000001157407407407407407) * getElapsedTicks();
}
//Gets the elapsed time in hours.
double getElapsedHours() {
return (platformInfo.nanosecondsPerTick * 0.00000000000027777777777777777777) * getElapsedTicks();
}
//Gets the elapsed time in minutes.
double getElapsedMinutes() {
return (platformInfo.nanosecondsPerTick * 0.00000000001666666666666666666666) * getElapsedTicks();
}
//Gets the elapsed time in seconds.
double getElapsedSeconds() {
return (platformInfo.nanosecondsPerTick * 0.000000001) * getElapsedTicks();
}
//Gets the elapsed time in milliseconds.
double getElapsedMilliseconds() {
return (platformInfo.nanosecondsPerTick * 0.000001) * getElapsedTicks();
}
//Gets the elapsed time in microseconds.
double getElapsedMicroseconds() {
return (platformInfo.nanosecondsPerTick * 0.001) * getElapsedTicks();
}
//Gets the elapsed time in nanoseconds.
double getElapsedNanoseconds() {
return platformInfo.nanosecondsPerTick * getElapsedTicks();
}
//Resets the elapsed time.
void reset() {
elapsed = 0.0;
isRunning = false;
startTimeStamp = 0.0;
}
//Resets the elapsed time to the specified platform-specific ticks.
void reset(double ticks) {
elapsed = ticks;
isRunning = false;
startTimeStamp = 0.0;
}
//Resets the stopwatch and restarts it.
void restart() {
elapsed = 0.0;
isRunning = true;
startTimeStamp = getTimeStamp();
}
//Resets the stopwatch to specified platform-specific ticks and restarts it.
void restart(double ticks) {
elapsed = ticks;
isRunning = true;
startTimeStamp = getTimeStamp();
}
//Converts nanoseconds to platform-specific ticks.
double convertToTicks(double nanoseconds) {
return nanoseconds / platformInfo.nanosecondsPerTick;
}
//Converts platform-specific ticks to nanoseconds.
double convertToNanoseconds(double ticks) {
return ticks * platformInfo.nanosecondsPerTick;
}
};
Main.h:
#include "stdafx.h"
#include "stdio.h"
//entry point
int main() {
Stopwatch stopwatch;
int x;
double value = 100002030.0;
stopwatch.start();
for (x = 0; x < 100000000; x++)
value /= 2.2;
printf("Division: %fms, value: %f", stopwatch.getElapsedMilliseconds(), value);
value = 100002030.0;
stopwatch.restart();
for (x = 0; x < 100000000; x++)
value *= 0.45454545454;
printf("\nMultiplication: %fms, value: %f", stopwatch.getElapsedMilliseconds(), value);
scanf_s("");
}
You declared static member platformInfo, but didn't define it, so the linker complains about it. To define this static member outside of the class, add this line at top-level of your source:
const Stopwatch::StopwatchInitializer Stopwatch::platformInfo;
BOOL QueryPerformanceCounter(
__out LARGE_INTEGER *lpPerformanceCount
);
LARGE_IN
TEGER startTimer()
{
LARGE_INTEGER start;
DWORD_PTR oldmask = SetThreadAffinityMask(GetCurrentThread(), 0);
QueryPerformanceCounter(&start);
SetThreadAffinityMask(GetCurrentThread(), oldmask);
return
start;
}
LARGE_INTEGER endTimer()
{
LARGE_INTE
GER stop;
DWORD_PTR oldmask = SetThreadAffinityMask(GetCurrentThread(), 0);
QueryPerformanceCounter(&stop);
SetThreadAffinityMask(GetCurrentThread(), oldmask);
return
stop;
}
I'm using those functions but I'm not sure in what type it returns values.
endTimer - startTimer = ? How to convert this result to get seconds ?
You need to get frequency and divide your 2 counters difference by it.
LARGE_INTEGER fr,t1,t2;
QueryPerformanceCounter(&t1);
// some lengthy code ...
QueryPerformanceCounter(&t2);
QueryPerformanceFrequency(&fr);
double diff_sec = (t2.QuadPart-t1.QuadPart)/(double)fr.QuadPart;
I've tried this code in C++ on Win7x64 platform with MSVC++, and I got CPU frequency about 2900000 ticks per second.
When I run this program, my stopwatch returns about 10,000,000 tick, which means it take about 4 seconds to process my program, but my program results are ready for me in 1 second (or less) O_o.
Could you please tell me what is wrong in my code?
#include <iostream>
#include "header.h"
#include <fstream>
#include <string>
#include <sstream>
#include <strsafe.h>
#include <direct.h>
#include <string.h>
using namespace std;
#define CV_TO_NANO 1000000000
#define CV_TO_MICRO 1000000
#define CV_TO_MILLI 1000
unsigned __int64 inline GetRDTSC()
{
__asm
{
; Flush the pipeline
XOR eax, eax
CPUID
; Get RDTSC counter in edx:eax
RDTSC
}
}
unsigned __int64 RunTest(TCHAR *AppName, TCHAR *CmdLine);
void main()
{
unsigned __int64 start = 0;
unsigned __int64 stop = 0;
unsigned __int64 freq = 0;
float rps;
ofstream dataFile;
// get processor freq
QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
cout <<"freq (count per second): "<< freq << endl;
// round per second
rps = 1.0/(freq);
cout <<"rps (1/rps): "<< rps << endl;
dataFile.open ("d:/dataC.txt",ios::out );
for(int i = 0;i<200;i++)
{
SetProcessAffinityMask(GetCurrentProcess(),0x0001);
SetThreadAffinityMask(GetCurrentThread(),0x0001);
cout << RunTest(L"D:\\Child\\Child.exe", NULL);
}
getchar();
return;
}
unsigned __int64 RunTest(TCHAR *AppName, TCHAR *CmdLine)
{
unsigned __int64 start = 0;
unsigned __int64 stop = 0;
PROCESS_INFORMATION processInformation;
STARTUPINFO startupInfo;
memset(&processInformation, 0, sizeof(processInformation));
memset(&startupInfo, 0, sizeof(startupInfo));
startupInfo.cb = sizeof(startupInfo);
BOOL result;
start = GetRDTSC();
result = ::CreateProcess(AppName, CmdLine, NULL, NULL, FALSE, REALTIME_PRIORITY_CLASS, NULL, NULL, &startupInfo, &processInformation);
stop = GetRDTSC();
getchar();
if (result == 0)
{
wprintf(L"ERROR: CreateProcess failed!");
}
else
{
WaitForSingleObject( processInformation.hProcess, 0 );
CloseHandle( processInformation.hProcess );
CloseHandle( processInformation.hThread );
}
return stop - start;
}
I think you have a misconception here that QueryPerformanceFrequency is telling you something about the speed of your processor - it isn't. QueryPerformanceFrequency retrieves the frequency of the high-resolution performance counter, which is not guaranteed to have any predictable relationship to your CPU clock speed. This value needs to be used in conjunction with QueryPerformanceCounter in order to get quality timing values, not with assembly that directly queries the RDTSC.
Here is an example of how to use the high-frequency timer to time a block of code:
#include <Windows.h>
#include <iostream>
using namespace std;
int main()
{
LARGE_INTEGER li = {};
__int64 freq, start, stop;
QueryPerformanceFrequency(&li);
freq = li.QuadPart;
cout << "Counter Frequency: " << freq << "\n";
QueryPerformanceCounter(&li);
start = li.QuadPart;
for( int i = 0; i < 1000000; ++i )
{
int n = i * rand();
}
QueryPerformanceCounter(&li);
stop = li.QuadPart;
double elapsed_seconds = static_cast<double>(stop-start) / static_cast<double>(freq);
cout << "Elapsed Time: " << elapsed_seconds << " seconds\n";
}
I have the following code running on qnx momemntics.
#define BILLION 1000000000L;
struct timespec start_time;
struct timespec stop_time;
void start MyTestFunc() {
//Initialize the Test Start time
clock_gettime(CLOCK_REALTIME,&start_time)
// ... additonal code.
cout << "The exectuion time of func "<< calculateExecutionTime();
}
double calculateExecutionTime ()
{
clock_gettime(CLOCK_REALTIME,&stop_time);
double dSeconds = (stop_time.tv_sec - start_time.tv_sec);
double dNanoSeconds = (double)( stop_time.tv_nsec - start_time.tv_nsec ) / BILLION;
return dSeconds + dNanoSeconds;
}
Now i want to port above code to windows. can any one provide sample code.
Thanks!
You can implement a clock_gettime() replacement for windows as follows:
LARGE_INTEGER
getFILETIMEoffset()
{
SYSTEMTIME s;
FILETIME f;
LARGE_INTEGER t;
s.wYear = 1970;
s.wMonth = 1;
s.wDay = 1;
s.wHour = 0;
s.wMinute = 0;
s.wSecond = 0;
s.wMilliseconds = 0;
SystemTimeToFileTime(&s, &f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
return (t);
}
int
clock_gettime(int X, struct timeval *tv)
{
LARGE_INTEGER t;
FILETIME f;
double microseconds;
static LARGE_INTEGER offset;
static double frequencyToMicroseconds;
static int initialized = 0;
static BOOL usePerformanceCounter = 0;
if (!initialized) {
LARGE_INTEGER performanceFrequency;
initialized = 1;
usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency);
if (usePerformanceCounter) {
QueryPerformanceCounter(&offset);
frequencyToMicroseconds = (double)performanceFrequency.QuadPart / 1000000.;
} else {
offset = getFILETIMEoffset();
frequencyToMicroseconds = 10.;
}
}
if (usePerformanceCounter) QueryPerformanceCounter(&t);
else {
GetSystemTimeAsFileTime(&f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
}
t.QuadPart -= offset.QuadPart;
microseconds = (double)t.QuadPart / frequencyToMicroseconds;
t.QuadPart = microseconds;
tv->tv_sec = t.QuadPart / 1000000;
tv->tv_usec = t.QuadPart % 1000000;
return (0);
}
Avoiding PerformanceCounter mess, simple code:
struct timespec { long tv_sec; long tv_nsec; }; //header part
int clock_gettime(int, struct timespec *spec) //C-file part
{ __int64 wintime; GetSystemTimeAsFileTime((FILETIME*)&wintime);
wintime -=116444736000000000i64; //1jan1601 to 1jan1970
spec->tv_sec =wintime / 10000000i64; //seconds
spec->tv_nsec =wintime % 10000000i64 *100; //nano-seconds
return 0;
}
...is fast, reliable and correct porting solution with impressive 100ns precision (1ms/10000).
And QPC-based solution which precision will be possibly (on some hw) even better is:
struct timespec { long tv_sec; long tv_nsec; }; //header part
#define exp7 10000000i64 //1E+7 //C-file part
#define exp9 1000000000i64 //1E+9
#define w2ux 116444736000000000i64 //1.jan1601 to 1.jan1970
void unix_time(struct timespec *spec)
{ __int64 wintime; GetSystemTimeAsFileTime((FILETIME*)&wintime);
wintime -=w2ux; spec->tv_sec =wintime / exp7;
spec->tv_nsec =wintime % exp7 *100;
}
int clock_gettime(int, timespec *spec)
{ static struct timespec startspec; static double ticks2nano;
static __int64 startticks, tps =0; __int64 tmp, curticks;
QueryPerformanceFrequency((LARGE_INTEGER*)&tmp); //some strange system can
if (tps !=tmp) { tps =tmp; //init ~~ONCE //possibly change freq ?
QueryPerformanceCounter((LARGE_INTEGER*)&startticks);
unix_time(&startspec); ticks2nano =(double)exp9 / tps; }
QueryPerformanceCounter((LARGE_INTEGER*)&curticks); curticks -=startticks;
spec->tv_sec =startspec.tv_sec + (curticks / tps);
spec->tv_nsec =startspec.tv_nsec + (double)(curticks % tps) * ticks2nano;
if (!(spec->tv_nsec < exp9)) { spec->tv_sec++; spec->tv_nsec -=exp9; }
return 0;
}
My improved version of clock_gettime() using QueryPerformanceCounter().
#define BILLION (1E9)
static BOOL g_first_time = 1;
static LARGE_INTEGER g_counts_per_sec;
int clock_gettime(int dummy, struct timespec *ct)
{
LARGE_INTEGER count;
if (g_first_time)
{
g_first_time = 0;
if (0 == QueryPerformanceFrequency(&g_counts_per_sec))
{
g_counts_per_sec.QuadPart = 0;
}
}
if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) ||
(0 == QueryPerformanceCounter(&count)))
{
return -1;
}
ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
return 0;
}
I think my version is an improvement over the currently accepted answer using QueryPerformanceCounter(), because -
More robust - checks return values of functions, also value returned in pass-by-reference variable.
More robust - checks validity of input parameter.
More streamlined - Uses as few as necessary number of variables (3 vs 7).
More streamlined - Avoids the code-path involving GetSystemTimeAsFileTime() since QueryPerformanceFrequency() and QueryPerformanceCounter() are guaranteed to work on systems that run Windows XP or later.
A full-featured and fully-tested implementation of clock_gettime() has been in mingw-w64 for many years now. You'll have to use a toolchain with mingw64/msys2 to use this, with header #include <time.h> (on windows). If you're writing a codebase that's portable between linux and windows, and you can't find clock_gettime() in <time.h> for your linux builds 3, I'd suggest you try #include <pthread_time.h>, compiling with -pthread, or linking with -lrt.
See also question 60020968 for Windows builds; and 33846055, 538609 for your Linux builds.
I needed monotonic and realtime.
For monotonic, I just take the perf counter since a wall clock baseline is meaningless.
#define MS_PER_SEC 1000ULL // MS = milliseconds
#define US_PER_MS 1000ULL // US = microseconds
#define HNS_PER_US 10ULL // HNS = hundred-nanoseconds (e.g., 1 hns = 100 ns)
#define NS_PER_US 1000ULL
#define HNS_PER_SEC (MS_PER_SEC * US_PER_MS * HNS_PER_US)
#define NS_PER_HNS (100ULL) // NS = nanoseconds
#define NS_PER_SEC (MS_PER_SEC * US_PER_MS * NS_PER_US)
int clock_gettime_monotonic(struct timespec *tv)
{
static LARGE_INTEGER ticksPerSec;
LARGE_INTEGER ticks;
if (!ticksPerSec.QuadPart) {
QueryPerformanceFrequency(&ticksPerSec);
if (!ticksPerSec.QuadPart) {
errno = ENOTSUP;
return -1;
}
}
QueryPerformanceCounter(&ticks);
tv->tv_sec = (long)(ticks.QuadPart / ticksPerSec.QuadPart);
tv->tv_nsec = (long)(((ticks.QuadPart % ticksPerSec.QuadPart) * NS_PER_SEC) / ticksPerSec.QuadPart);
return 0;
}
and wall clock, based on GMT unlike the tempting and similar _ftime() function.
int clock_gettime_realtime(struct timespec *tv)
{
FILETIME ft;
ULARGE_INTEGER hnsTime;
GetSystemTimePreciseAsFileTime(&ft);
hnsTime.LowPart = ft.dwLowDateTime;
hnsTime.HighPart = ft.dwHighDateTime;
// To get POSIX Epoch as baseline, subtract the number of hns intervals from Jan 1, 1601 to Jan 1, 1970.
hnsTime.QuadPart -= (11644473600ULL * HNS_PER_SEC);
// modulus by hns intervals per second first, then convert to ns, as not to lose resolution
tv->tv_nsec = (long) ((hnsTime.QuadPart % HNS_PER_SEC) * NS_PER_HNS);
tv->tv_sec = (long) (hnsTime.QuadPart / HNS_PER_SEC);
return 0;
}
And then the POSIX compatible function... see POSIX header for typedef and macros.
int clock_gettime(clockid_t type, struct timespec *tp)
{
if (type == CLOCK_MONOTONIC)
{
return clock_gettime_monotonic(tp);
}
else if (type == CLOCK_REALTIME)
{
return clock_gettime_realtime(tp);
}
errno = ENOTSUP;
return -1;
}
You can use timespec_get to implement simple clock_gettime.
(timespec_get function is available since C11)
int clock_gettime(int, struct timespec *tv)
{
return timespec_get(tv, TIME_UTC);
}
... but result timespec has about 10 milisec resolution on my windows7 64bit machine. :(
Here is my version of clock_gettime.
int clock_gettime(int, struct timespec *tv)
{
static int initialized = 0;
static LARGE_INTEGER freq, startCount;
static struct timespec tv_start;
LARGE_INTEGER curCount;
time_t sec_part;
long nsec_part;
if (!initialized) {
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&startCount);
timespec_get(&tv_start, TIME_UTC);
initialized = 1;
}
QueryPerformanceCounter(&curCount);
curCount.QuadPart -= startCount.QuadPart;
sec_part = curCount.QuadPart / freq.QuadPart;
nsec_part = (long)((curCount.QuadPart - (sec_part * freq.QuadPart))
* 1000000000UL / freq.QuadPart);
tv->tv_sec = tv_start.tv_sec + sec_part;
tv->tv_nsec = tv_start.tv_nsec + nsec_part;
if(tv->tv_nsec >= 1000000000UL) {
tv->tv_sec += 1;
tv->tv_nsec -= 1000000000UL;
}
return 0;
}