Calculating CPU usage per core by WinAPI

Calculating CPU usage per core by WinAPI - c++

I am trying to calculate CPU usage per core using C++ and WinAPI, but I am not able to do that properly. Thus, I have managed to calculate it by following:
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <windows.h>
#define SystemProcessorPerformanceInformation 0x8
#define SystemBasicInformation 0x0
int _tmain(int argc, _TCHAR* argv[])
{
typedef struct _SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION
{
LARGE_INTEGER IdleTime;
LARGE_INTEGER KernelTime;
LARGE_INTEGER UserTime;
LARGE_INTEGER Reserved1[2];
ULONG Reserved2;
} SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION;
typedef struct _SYSTEM_BASIC_INFORMATION {
ULONG Reserved;
ULONG TimerResolution;
ULONG PageSize;
ULONG NumberOfPhysicalPages;
ULONG LowestPhysicalPageNumber;
ULONG HighestPhysicalPageNumber;
ULONG AllocationGranularity;
ULONG_PTR MinimumUserModeAddress;
ULONG_PTR MaximumUserModeAddress;
KAFFINITY ActiveProcessorsAffinityMask;
CCHAR NumberOfProcessors;
} SYSTEM_BASIC_INFORMATION, *PSYSTEM_BASIC_INFORMATION;
// SYSTEM_INFO sysinf;
if (argc<2)
{
printf("Please specify waiting time in seconds\n");
return -1;
}
int nWaitSec = _wtoi(argv[1]);
if (nWaitSec <= 0)
{
printf("Waiting interval in seconds should be positive integer\n");
return -1;
}
typedef DWORD(WINAPI * PNTQUERYSYSYTEMINFORMATION)(DWORD info_class, void *out, DWORD size, DWORD *out_size);
PNTQUERYSYSYTEMINFORMATION pNtQuerySystemInformation = NULL;
pNtQuerySystemInformation = (PNTQUERYSYSYTEMINFORMATION)GetProcAddress(GetModuleHandle(L"NTDLL.DLL"), "NtQuerySystemInformation");
SYSTEM_BASIC_INFORMATION sbi;
SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION * spi;
DWORD returnlength;
DWORD status = pNtQuerySystemInformation(SystemBasicInformation, &sbi,
sizeof(SYSTEM_BASIC_INFORMATION), &returnlength);
spi = new SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION[sbi.NumberOfProcessors];
memset(spi, 0, sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION)*sbi.NumberOfProcessors);
status = pNtQuerySystemInformation(SystemProcessorPerformanceInformation, spi,
(sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION)*sbi.NumberOfProcessors), &returnlength);
int numberOfCores = returnlength / sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION);
printf("Number of cores: %d\n", numberOfCores);
static ULARGE_INTEGER ul_sys_idle_old[32];
static ULARGE_INTEGER ul_sys_kernel_old[32];
static ULARGE_INTEGER ul_sys_user_old[32];
float usage = 0;
float usageAccum = 0;
printf("\n\nWait for %d seconds\n", nWaitSec);
Sleep(nWaitSec*1000);
status = pNtQuerySystemInformation(SystemProcessorPerformanceInformation, spi,
(sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION)*numberOfCores), &returnlength);
usageAccum = 0;
for (int ii = 0; ii<numberOfCores; ii++)
{
ULARGE_INTEGER ul_sys_idle;
ULARGE_INTEGER ul_sys_kernel;
ULARGE_INTEGER ul_sys_user;
ul_sys_idle.QuadPart = spi[ii].IdleTime.QuadPart;
ul_sys_kernel.QuadPart = spi[ii].KernelTime.QuadPart;
ul_sys_user.QuadPart = spi[ii].UserTime.QuadPart;
ULONGLONG kernelTime = (ul_sys_kernel.QuadPart - ul_sys_kernel_old[ii].QuadPart);
ULONGLONG usertime = (ul_sys_user.QuadPart - ul_sys_user_old[ii].QuadPart);
ULONGLONG idletime = (ul_sys_idle.QuadPart - ul_sys_idle_old[ii].QuadPart);
ULONGLONG proctime = kernelTime + usertime - idletime;
ULONGLONG totaltime = kernelTime + usertime;
usage = (float)(proctime * 100) / totaltime;
usageAccum += usage;
printf("Core : %u: Usage : %f%%\n", ii + 1, usage);
}
usageAccum /= numberOfCores;
printf("----------------\nAverage for the last %d seconds: %f", nWaitSec, usageAccum);
delete[] spi;
return 0;
}
Despite that fact, it seems to be calculated in a wrong way because its output data almost does not change. How could I find CPU usage per core appropriately?

These three arrays are not updated in real time in your source code.So their value is always 0 in the loop.
static ULARGE_INTEGER ul_sys_idle_old[32];
static ULARGE_INTEGER ul_sys_kernel_old[32];
static ULARGE_INTEGER ul_sys_user_old[32];
What you get is the CPU usage time that has been running for a long time, and the base is very large, resulting in basically unchanged.
You can refer to Correct way to get Windows CPU utilization for multiprocessor and update your array to get the correct result.

Related

How can i count percentage CPU for each process by PID

I am trying to code task manager and i stuck with %CPU for each process dy PID.
I wrote something like, that:
static float CalculateCPULoad(unsigned long long idleTicks, unsigned long long totalTicks)
{
static unsigned long long _previousTotalTicks = 0;
static unsigned long long _previousIdleTicks = 0;
unsigned long long totalTicksSinceLastTime = totalTicks - _previousTotalTicks;
unsigned long long idleTicksSinceLastTime = idleTicks - _previousIdleTicks;
float ret = 1.0f - ((totalTicksSinceLastTime > 0) ? ((float)idleTicksSinceLastTime) / totalTicksSinceLastTime : 0);
_previousTotalTicks = totalTicks;
_previousIdleTicks = idleTicks;
return ret;
}
static unsigned long long FileTimeToInt64(const FILETIME& ft) { return (((unsigned long long)(ft.dwHighDateTime)) << 32) | ((unsigned long long)ft.dwLowDateTime); }
And was using it like:
hProcessSnap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
if (hProcessSnap == INVALID_HANDLE_VALUE)
{
printError("Failed to create Process Snap");
return FALSE;
}
pe.dwSize = sizeof(PROCESSENTRY32);
if (!Process32First(hProcessSnap, &pe))
{
printError("Failed to move along process snap");
CloseHandle(hProcessSnap);
return FALSE;
}
do
{
printf("\n\n=====================================================");
_tprintf(TEXT("\n PROCESS NAME: %s"), pe.szExeFile);
printf("\n-----------------------------------------------------");
dwPriorityClass = 0;
hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pe.th32ProcessID);
if (hProcess == NULL)
{
printError("Failed to open process");
}
else
{
for (int i = 0; i < 2; i++)
{
GetProcessTimes(hProcess, &exist, &exit, &lastKernel, &lastUser);
GetSystemTimes(&lastIdle, 0, 0);
GetCPULoad(lastIdle, lastKernel, lastUser);
Sleep(2500);
}
std::cout << GetCPULoad(lastIdle, lastKernel, lastUser) << "\n";
CloseHandle(hProcess);
}
} while (Process32Next(hProcessSnap, &pe));
CloseHandle(hProcessSnap);
return (TRUE);
}
I know that using sleep() here isnt a good idea,but i havent think up anything better for now.
Pls help me with some code examples,if you can.
Also i want to know am i right that:
CPU% for process= (1- (IdleSystemTimeDelta/TotalProcessTimeDelta))*100%

This is how i get_CPU in percent.
I use hash_map in order to have PID-time connection static to update it.
First time usage get_cpu_usage(int pid) returns zero every time,but with each next usage it will be more and more accurate(I use it with 0.5 sec period).
static int get_processor_number()
{
SYSTEM_INFO info;
GetSystemInfo(&info);
return (int)info.dwNumberOfProcessors;
}
static __int64 file_time_2_utc(const FILETIME* ftime)
{
LARGE_INTEGER li;
li.LowPart = ftime->dwLowDateTime;
li.HighPart = ftime->dwHighDateTime;
return li.QuadPart;
}
static int get_cpu_usage(int pid)
{
static int processor_count_ = -1;
static std::unordered_map<int, __int64> last_time_;
static std::unordered_map<int, __int64> last_system_time_;
FILETIME now;
FILETIME creation_time;
FILETIME exit_time;
FILETIME kernel_time;
FILETIME user_time;
__int64 system_time;
__int64 time;
__int64 system_time_delta;
__int64 time_delta;
int cpu = -1;
if (processor_count_ == -1)
{
processor_count_ = get_processor_number();
}
GetSystemTimeAsFileTime(&now);
HANDLE hProcess = OpenProcess(PROCESS_ALL_ACCESS, false, pid);
if (!GetProcessTimes(hProcess, &creation_time, &exit_time, &kernel_time, &user_time))
{
std::cout << "Unable to getProcessTime\n";
return -1;
}
system_time = (file_time_2_utc(&kernel_time) + file_time_2_utc(&user_time)) / processor_count_;
time = file_time_2_utc(&now);
if ((last_system_time_[pid] == 0) || (last_time_[pid] == 0))
{
last_system_time_[pid] = system_time;
last_time_[pid] = time;
return 0;
}
system_time_delta = system_time - last_system_time_[pid];
time_delta = time - last_time_[pid];
if (time_delta == 0)
{
std::cout << "timedelta=0";
return -1;
}
cpu = int((system_time_delta * 100 + time_delta / 2) / time_delta);
last_system_time_[pid] = system_time;
last_time_[pid] = time;
return cpu;
}

CPU Usage of every process

I found this code on google that calculate of % of usage of current process on win10,but what i'm looking for is a list of % of CPU Usage of every process.
I use GetCurrentProcess() to have the handle of the current process. Is there a way to retrieve the handle of every processes?i'm working on a code that lists running process and calculate usage memory for everyone. Then i need to calculate cpu usage for everyone but i didn't find anything on google.
static ULARGE_INTEGER lastCPU, lastSysCPU, lastUserCPU;
static int numProcessors;
static HANDLE self;
void init(){
SYSTEM_INFO sysInfo;
FILETIME ftime, fsys, fuser;
GetSystemInfo(&sysInfo);
numProcessors = sysInfo.dwNumberOfProcessors;
GetSystemTimeAsFileTime(&ftime);
memcpy(&lastCPU, &ftime, sizeof(FILETIME));
self = GetCurrentProcess();
GetProcessTimes(self, &ftime, &ftime, &fsys, &fuser);
memcpy(&lastSysCPU, &fsys, sizeof(FILETIME));
memcpy(&lastUserCPU, &fuser, sizeof(FILETIME));
}
double getCurrentValue(){
FILETIME ftime, fsys, fuser;
ULARGE_INTEGER now, sys, user;
long double percent;
GetSystemTimeAsFileTime(&ftime);
memcpy(&now, &ftime, sizeof(FILETIME));
GetProcessTimes(GetCurrentProcess(), &ftime, &ftime, &fsys, &fuser);
memcpy(&sys, &fsys, sizeof(FILETIME));
memcpy(&user, &fuser, sizeof(FILETIME));
percent = (sys.QuadPart - lastSysCPU.QuadPart) +
(user.QuadPart - lastUserCPU.QuadPart);
percent /= (now.QuadPart - lastCPU.QuadPart);
percent /= numProcessors;
lastCPU = now;
lastUserCPU = user;
lastSysCPU = sys;
return percent * 100;
}
I'm able to have the list of all running processes but i'm looking for to
calculate cpu usage for every process.
Suggestions?

ok, for effective coding this task need use NtQueryInformationProcess with SystemProcessInformation info class. we got here array of SYSTEM_PROCESS_INFORMATION. here we already have:
LARGE_INTEGER CreateTime;
LARGE_INTEGER UserTime;
LARGE_INTEGER KernelTime;
when in PROCESSENTRY32 no this members. toolhelp functions simply drop this member. without it we need open every process, call GetProcessTimes, etc. with NtQueryInformationProcess all become much more effective and simply. general idea - we need maintain list of processes, and periodic call NtQueryInformationProcess for add new created processes and remove died.
// for debug only 0 <= cpuUsage <= 1000
void PrintCpuUsage(ULONG cpuUsage, PCUNICODE_STRING Name)
{
ULONG p = cpuUsage / 10;
DbgPrint("%02u.%u %wZ\n", p, cpuUsage - p * 10, Name);
}
struct PROCESS_ENTRY : LIST_ENTRY, UNICODE_STRING
{
LARGE_INTEGER _CreateTime, _RunTime;
union {
LARGE_INTEGER _Delta;
ULONG _cpuUsage;
};
HANDLE _UniqueProcessId;
HANDLE _InheritedFromUniqueProcessId;
BOOLEAN _bEnumerated;
PROCESS_ENTRY()
{
RtlInitUnicodeString(this, 0);
InitializeListHead(this);
_RunTime.QuadPart = 0;
_UniqueProcessId = 0;
}
~PROCESS_ENTRY()
{
DbgPrint("--%08x(%08x) %wZ\n", _UniqueProcessId, _InheritedFromUniqueProcessId, static_cast<UNICODE_STRING*>(this));
RtlFreeUnicodeString(this);
RemoveEntryList(this);
}
NTSTATUS Init(PSYSTEM_PROCESS_INFORMATION pspi)
{
_UniqueProcessId = pspi->UniqueProcessId;
_InheritedFromUniqueProcessId = pspi->InheritedFromUniqueProcessId;
_CreateTime = pspi->CreateTime;
DbgPrint("++%08x(%08x) %wZ\n", _UniqueProcessId, _InheritedFromUniqueProcessId, &pspi->ImageName);
return RtlDuplicateUnicodeString(RTL_DUPLICATE_UNICODE_STRING_NULL_TERMINATE, &pspi->ImageName, this);
}
LONGLONG UpdateProcess(PSYSTEM_PROCESS_INFORMATION pspi)
{
_bEnumerated = TRUE;
pspi->KernelTime.QuadPart += pspi->UserTime.QuadPart;
_Delta.QuadPart = pspi->KernelTime.QuadPart - _RunTime.QuadPart;
_RunTime.QuadPart = pspi->KernelTime.QuadPart;
return _Delta.QuadPart;
}
void CalcCpuUsage(LONGLONG QuadPart)
{
_bEnumerated = FALSE;
_cpuUsage = (ULONG)((_Delta.QuadPart * 1000) / QuadPart );
if (_cpuUsage && _UniqueProcessId)
{
PrintCpuUsage(_cpuUsage, this);
}
}
};
struct PROCES_LIST : public LIST_ENTRY
{
LIST_ENTRY _ListHead;
PROCESS_ENTRY IdleProcess;
BOOL _bValid;
PROCES_LIST()
{
InitializeListHead(&_ListHead);
_bValid = FALSE;
}
LONGLONG UpdateOrAddNewProcess(PSYSTEM_PROCESS_INFORMATION pspi);
void RemoveDiedEntries(LONGLONG QuadPart);
void EnumPro();
~PROCES_LIST()
{
RemoveDiedEntries(0);
}
};
LONGLONG PROCES_LIST::UpdateOrAddNewProcess(PSYSTEM_PROCESS_INFORMATION pspi)
{
PROCESS_ENTRY* pe;
PLIST_ENTRY head = &_ListHead, entry = head;
HANDLE UniqueProcessId = pspi->UniqueProcessId;
while ((entry = entry->Flink) != head)
{
pe = static_cast<PROCESS_ENTRY*>(entry);
if (pe->_UniqueProcessId == UniqueProcessId && pe->_CreateTime.QuadPart == pspi->CreateTime.QuadPart)
{
return pe->UpdateProcess(pspi);
}
}
if (pe = new PROCESS_ENTRY)
{
if (0 <= pe->Init(pspi))
{
InsertTailList(head, pe);
return pe->UpdateProcess(pspi);
}
delete pe;
}
return 0;
}
void PROCES_LIST::RemoveDiedEntries(LONGLONG QuadPart)
{
PLIST_ENTRY head = &_ListHead, entry = head->Flink;
while (entry != head)
{
PROCESS_ENTRY* pe = static_cast<PROCESS_ENTRY*>(entry);
entry = entry->Flink;
if (pe->_bEnumerated)
{
pe->CalcCpuUsage(QuadPart);
}
else
{
delete pe;
}
}
}
void PROCES_LIST::EnumPro()
{
ULONG cb = 0, rcb = 0x10000;
PVOID stack = alloca(guz);// volatile UCHAR guz;
union {
PVOID buf;
PBYTE pb;
PSYSTEM_PROCESS_INFORMATION pspi;
};
NTSTATUS status;
do
{
if (cb < rcb)
{
cb = RtlPointerToOffset(buf = alloca(rcb - cb), stack);
}
if (0 <= (status = NtQuerySystemInformation(SystemProcessInformation, buf, cb, &rcb)))
{
LONGLONG QuadPart = 0;
ULONG NextEntryOffset = 0;
do
{
pb += NextEntryOffset;
if (pspi->UniqueProcessId)
{
QuadPart += UpdateOrAddNewProcess(pspi);
}
else
{
QuadPart += IdleProcess.UpdateProcess(pspi);
}
} while (NextEntryOffset = pspi->NextEntryOffset);
RemoveDiedEntries(QuadPart);
IdleProcess.CalcCpuUsage(QuadPart);
if (_bValid)
{
static UNICODE_STRING empty;
PrintCpuUsage(1000 - IdleProcess._cpuUsage, &empty);
}
else
{
_bValid = TRUE;
}
}
} while (status == STATUS_INFO_LENGTH_MISMATCH);
}

InterlockedIncrement vs InterlockedIncrementAcquire / Release (Redux)

This is a followup to this question:
I guess I don't understand the Interlocked Acquire / Release APIs. I put together the small program below. As I understand it, g_val_1, g_val_2 and g_val_3 should always be updated in the same order and should end up as all the same value. But they do not (for more than one thread).
What am I missing? Thanks.
#include "windows.h"
#include "stdio.h"
#define _THREADS_ 100
#define _TICKS_ 1000
int volatile g_threads = 0;
DWORD volatile g_val_1 = 0;
DWORD volatile g_val_2 = 0;
DWORD volatile g_val_3 = 0;
BOOL g_running = TRUE;
DWORD TestThread(PVOID ignore)
{
while (g_running)
{
InterlockedIncrementAcquire(&g_val_1);
g_val_2++;
InterlockedIncrementRelease(&g_val_3);
}
InterlockedDecrement(&g_threads);
return(0);
}
int __cdecl main(int argc, char* argv[])
{
int th, duration;
int success;
DWORD ticks;
duration = _TICKS_;
g_threads = _THREADS_;
printf("Max=%d Threads=%d Entries=%d\n", duration, g_threads);
printf("Creating Threads\n");
th = g_threads;
while (th-- > 0)
{
CreateThread(NULL,
0,
TestThread,
NULL,
NORMAL_PRIORITY_CLASS,
NULL);
}
printf("Starting Threads\n");
ticks = GetTickCount();
while ((GetTickCount() - ticks) < duration);
g_running = FALSE;
while (g_threads > 0);
ticks = GetTickCount() - ticks;
success = ((g_val_1 == g_val_2) && (g_val_1 == g_val_2));
printf("Duration=%d g_val_1=%d g_val_2=%d g_val_3=%d OK=%d\n", ticks, g_val_1, g_val_2, g_val_3, success);
}

Windows 8(.1) Sleep 1 more ms than needed

Sleep on Windows 8.1 x64 always lasts 1 more milliseconds than needed. For instance Sleep(1) lasts approximately 2 milliseconds, Sleep(2) - 3 etc. timeBeginPeriod is set to 1. On Windows 7 works fine as expected (without excess millisecond). Is this behaviour is normal / possible to fix?
#include <Windows.h>
#include <stdio.h>
#pragma comment(lib, "winmm.lib")
LARGE_INTEGER Frequency;
long long int GetCurrent()
{
LARGE_INTEGER counter;
QueryPerformanceCounter(&counter);
return (1000000 * counter.QuadPart / Frequency.QuadPart);
}
int CALLBACK WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow)
{
timeBeginPeriod(1);
QueryPerformanceFrequency(&Frequency);
const unsigned int count = 1000;
long long int buffer[count];
long long int lastTime = GetCurrent(), currentTime;
for (unsigned int i = 0; i < count; i++)
{
currentTime = GetCurrent();
buffer[i] = currentTime - lastTime;
lastTime = currentTime;
Sleep(1);
}
timeEndPeriod(1);
FILE *file = fopen("log.txt", "w");
for (unsigned int i = 0; i < count; i++)
fprintf(file, "%ld\n", buffer[i]);
fclose(file);
return EXIT_SUCCESS;
}

NtDelayExecution workaround thanks to Mehrdad.
static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval) = (NTSTATUS (__stdcall*)(BOOL, PLARGE_INTEGER)) GetProcAddress(GetModuleHandle(L"ntdll.dll"), "NtDelayExecution");
LARGE_INTEGER delay;
unsigned int milliseconds = 1;
delay.QuadPart = (milliseconds > 1) ? -10000LL * (milliseconds - 1) : -1LL;
NtDelayExecution(false, &delay);

Porting clock_gettime to windows

I have the following code running on qnx momemntics.
#define BILLION 1000000000L;
struct timespec start_time;
struct timespec stop_time;
void start MyTestFunc() {
//Initialize the Test Start time
clock_gettime(CLOCK_REALTIME,&start_time)
// ... additonal code.
cout << "The exectuion time of func "<< calculateExecutionTime();
}
double calculateExecutionTime ()
{
clock_gettime(CLOCK_REALTIME,&stop_time);
double dSeconds = (stop_time.tv_sec - start_time.tv_sec);
double dNanoSeconds = (double)( stop_time.tv_nsec - start_time.tv_nsec ) / BILLION;
return dSeconds + dNanoSeconds;
}
Now i want to port above code to windows. can any one provide sample code.
Thanks!

You can implement a clock_gettime() replacement for windows as follows:
LARGE_INTEGER
getFILETIMEoffset()
{
SYSTEMTIME s;
FILETIME f;
LARGE_INTEGER t;
s.wYear = 1970;
s.wMonth = 1;
s.wDay = 1;
s.wHour = 0;
s.wMinute = 0;
s.wSecond = 0;
s.wMilliseconds = 0;
SystemTimeToFileTime(&s, &f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
return (t);
}
int
clock_gettime(int X, struct timeval *tv)
{
LARGE_INTEGER t;
FILETIME f;
double microseconds;
static LARGE_INTEGER offset;
static double frequencyToMicroseconds;
static int initialized = 0;
static BOOL usePerformanceCounter = 0;
if (!initialized) {
LARGE_INTEGER performanceFrequency;
initialized = 1;
usePerformanceCounter = QueryPerformanceFrequency(&performanceFrequency);
if (usePerformanceCounter) {
QueryPerformanceCounter(&offset);
frequencyToMicroseconds = (double)performanceFrequency.QuadPart / 1000000.;
} else {
offset = getFILETIMEoffset();
frequencyToMicroseconds = 10.;
}
}
if (usePerformanceCounter) QueryPerformanceCounter(&t);
else {
GetSystemTimeAsFileTime(&f);
t.QuadPart = f.dwHighDateTime;
t.QuadPart <<= 32;
t.QuadPart |= f.dwLowDateTime;
}
t.QuadPart -= offset.QuadPart;
microseconds = (double)t.QuadPart / frequencyToMicroseconds;
t.QuadPart = microseconds;
tv->tv_sec = t.QuadPart / 1000000;
tv->tv_usec = t.QuadPart % 1000000;
return (0);
}

Avoiding PerformanceCounter mess, simple code:
struct timespec { long tv_sec; long tv_nsec; }; //header part
int clock_gettime(int, struct timespec *spec) //C-file part
{ __int64 wintime; GetSystemTimeAsFileTime((FILETIME*)&wintime);
wintime -=116444736000000000i64; //1jan1601 to 1jan1970
spec->tv_sec =wintime / 10000000i64; //seconds
spec->tv_nsec =wintime % 10000000i64 *100; //nano-seconds
return 0;
}
...is fast, reliable and correct porting solution with impressive 100ns precision (1ms/10000).
And QPC-based solution which precision will be possibly (on some hw) even better is:
struct timespec { long tv_sec; long tv_nsec; }; //header part
#define exp7 10000000i64 //1E+7 //C-file part
#define exp9 1000000000i64 //1E+9
#define w2ux 116444736000000000i64 //1.jan1601 to 1.jan1970
void unix_time(struct timespec *spec)
{ __int64 wintime; GetSystemTimeAsFileTime((FILETIME*)&wintime);
wintime -=w2ux; spec->tv_sec =wintime / exp7;
spec->tv_nsec =wintime % exp7 *100;
}
int clock_gettime(int, timespec *spec)
{ static struct timespec startspec; static double ticks2nano;
static __int64 startticks, tps =0; __int64 tmp, curticks;
QueryPerformanceFrequency((LARGE_INTEGER*)&tmp); //some strange system can
if (tps !=tmp) { tps =tmp; //init ~~ONCE //possibly change freq ?
QueryPerformanceCounter((LARGE_INTEGER*)&startticks);
unix_time(&startspec); ticks2nano =(double)exp9 / tps; }
QueryPerformanceCounter((LARGE_INTEGER*)&curticks); curticks -=startticks;
spec->tv_sec =startspec.tv_sec + (curticks / tps);
spec->tv_nsec =startspec.tv_nsec + (double)(curticks % tps) * ticks2nano;
if (!(spec->tv_nsec < exp9)) { spec->tv_sec++; spec->tv_nsec -=exp9; }
return 0;
}

My improved version of clock_gettime() using QueryPerformanceCounter().
#define BILLION (1E9)
static BOOL g_first_time = 1;
static LARGE_INTEGER g_counts_per_sec;
int clock_gettime(int dummy, struct timespec *ct)
{
LARGE_INTEGER count;
if (g_first_time)
{
g_first_time = 0;
if (0 == QueryPerformanceFrequency(&g_counts_per_sec))
{
g_counts_per_sec.QuadPart = 0;
}
}
if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) ||
(0 == QueryPerformanceCounter(&count)))
{
return -1;
}
ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart;
ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart;
return 0;
}
I think my version is an improvement over the currently accepted answer using QueryPerformanceCounter(), because -
More robust - checks return values of functions, also value returned in pass-by-reference variable.
More robust - checks validity of input parameter.
More streamlined - Uses as few as necessary number of variables (3 vs 7).
More streamlined - Avoids the code-path involving GetSystemTimeAsFileTime() since QueryPerformanceFrequency() and QueryPerformanceCounter() are guaranteed to work on systems that run Windows XP or later.

A full-featured and fully-tested implementation of clock_gettime() has been in mingw-w64 for many years now. You'll have to use a toolchain with mingw64/msys2 to use this, with header #include <time.h> (on windows). If you're writing a codebase that's portable between linux and windows, and you can't find clock_gettime() in <time.h> for your linux builds 3, I'd suggest you try #include <pthread_time.h>, compiling with -pthread, or linking with -lrt.
See also question 60020968 for Windows builds; and 33846055, 538609 for your Linux builds.

I needed monotonic and realtime.
For monotonic, I just take the perf counter since a wall clock baseline is meaningless.
#define MS_PER_SEC 1000ULL // MS = milliseconds
#define US_PER_MS 1000ULL // US = microseconds
#define HNS_PER_US 10ULL // HNS = hundred-nanoseconds (e.g., 1 hns = 100 ns)
#define NS_PER_US 1000ULL
#define HNS_PER_SEC (MS_PER_SEC * US_PER_MS * HNS_PER_US)
#define NS_PER_HNS (100ULL) // NS = nanoseconds
#define NS_PER_SEC (MS_PER_SEC * US_PER_MS * NS_PER_US)
int clock_gettime_monotonic(struct timespec *tv)
{
static LARGE_INTEGER ticksPerSec;
LARGE_INTEGER ticks;
if (!ticksPerSec.QuadPart) {
QueryPerformanceFrequency(&ticksPerSec);
if (!ticksPerSec.QuadPart) {
errno = ENOTSUP;
return -1;
}
}
QueryPerformanceCounter(&ticks);
tv->tv_sec = (long)(ticks.QuadPart / ticksPerSec.QuadPart);
tv->tv_nsec = (long)(((ticks.QuadPart % ticksPerSec.QuadPart) * NS_PER_SEC) / ticksPerSec.QuadPart);
return 0;
}
and wall clock, based on GMT unlike the tempting and similar _ftime() function.
int clock_gettime_realtime(struct timespec *tv)
{
FILETIME ft;
ULARGE_INTEGER hnsTime;
GetSystemTimePreciseAsFileTime(&ft);
hnsTime.LowPart = ft.dwLowDateTime;
hnsTime.HighPart = ft.dwHighDateTime;
// To get POSIX Epoch as baseline, subtract the number of hns intervals from Jan 1, 1601 to Jan 1, 1970.
hnsTime.QuadPart -= (11644473600ULL * HNS_PER_SEC);
// modulus by hns intervals per second first, then convert to ns, as not to lose resolution
tv->tv_nsec = (long) ((hnsTime.QuadPart % HNS_PER_SEC) * NS_PER_HNS);
tv->tv_sec = (long) (hnsTime.QuadPart / HNS_PER_SEC);
return 0;
}
And then the POSIX compatible function... see POSIX header for typedef and macros.
int clock_gettime(clockid_t type, struct timespec *tp)
{
if (type == CLOCK_MONOTONIC)
{
return clock_gettime_monotonic(tp);
}
else if (type == CLOCK_REALTIME)
{
return clock_gettime_realtime(tp);
}
errno = ENOTSUP;
return -1;
}

You can use timespec_get to implement simple clock_gettime.
(timespec_get function is available since C11)
int clock_gettime(int, struct timespec *tv)
{
return timespec_get(tv, TIME_UTC);
}
... but result timespec has about 10 milisec resolution on my windows7 64bit machine. :(
Here is my version of clock_gettime.
int clock_gettime(int, struct timespec *tv)
{
static int initialized = 0;
static LARGE_INTEGER freq, startCount;
static struct timespec tv_start;
LARGE_INTEGER curCount;
time_t sec_part;
long nsec_part;
if (!initialized) {
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&startCount);
timespec_get(&tv_start, TIME_UTC);
initialized = 1;
}
QueryPerformanceCounter(&curCount);
curCount.QuadPart -= startCount.QuadPart;
sec_part = curCount.QuadPart / freq.QuadPart;
nsec_part = (long)((curCount.QuadPart - (sec_part * freq.QuadPart))
* 1000000000UL / freq.QuadPart);
tv->tv_sec = tv_start.tv_sec + sec_part;
tv->tv_nsec = tv_start.tv_nsec + nsec_part;
if(tv->tv_nsec >= 1000000000UL) {
tv->tv_sec += 1;
tv->tv_nsec -= 1000000000UL;
}
return 0;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Calculating CPU usage per core by WinAPI - c++

Related

How can i count percentage CPU for each process by PID

CPU Usage of every process

InterlockedIncrement vs InterlockedIncrementAcquire / Release (Redux)

Windows 8(.1) Sleep 1 more ms than needed

Porting clock_gettime to windows

Categories

Resources