Estimate the memory needed for pmr pool resource - c++

I am learning about the std::pmr::unsynchronized_pool_resource and tried using it in a simple example.
In my example, I'm using a buffer l_Buffer with an arbitrarily determined size.
Is it possible to estimate the size (k_BufferSize) needed at build time?
The code snippet:
#include <iostream>
#include <array>
#include <memory_resource>
#include <string>
#include <cassert>
#include <list>
class NoisyAllocator : public std::pmr::memory_resource {
public:
NoisyAllocator(std::string name, std::pmr::memory_resource* upstream) :
m_name {name}, m_upstream {upstream}
{
assert(upstream);
}
private:
std::string m_name;
std::pmr::memory_resource* m_upstream;
void* do_allocate(std::size_t bytes, std::size_t alignment) override {
printf("[%s (alloc)] Size: %zu Alignment: %zu ...\n", m_name.c_str(), bytes, alignment);
auto result = m_upstream->allocate(bytes, alignment);
printf("[%s (alloc)] ... Address: %p\n", m_name.c_str(), result);
return result;
}
void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
printf("[%s (dealloc)] Address: %p Dealloc Size: %zu Alignment: %zu\n", m_name.c_str(), p, bytes, alignment);
m_upstream->deallocate(p, bytes, alignment);
}
bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
return this == &other;
}
};
int main(int, char**) {
static constexpr std::size_t k_BufferSize {10000};
static constexpr std::size_t k_NbElems {10};
static constexpr std::size_t k_ValueSize {1};
std::cout << "---- Default allocator ----" << std::endl;
NoisyAllocator l_DefaultAlloc {"Default allocator", std::pmr::null_memory_resource()};
std::pmr::set_default_resource(&l_DefaultAlloc);
std::cout << "---- Buffer resource ----" << std::endl;
NoisyAllocator l_OomAlloc {"Out of memory allocator", std::pmr::null_memory_resource()};
std::array<uint8_t, k_BufferSize> l_Buffer {};
std::pmr::monotonic_buffer_resource l_BufferResource(l_Buffer.data(), l_Buffer.size(), &l_OomAlloc);
std::cout << "---- Monotonic allocator ----" << std::endl;
NoisyAllocator l_MonotonicAlloc {"Monotonic allocator", &l_BufferResource};
std::cout << "---- Pool allocator ----" << std::endl;
std::pmr::unsynchronized_pool_resource l_UnsyncPool(&l_MonotonicAlloc);
NoisyAllocator l_PoolAlloc {"Pool allocator", &l_UnsyncPool};
std::cout << "---- Create list ----" << std::endl;
using value_type_t = std::array<uint8_t, k_ValueSize>;
std::pmr::list<value_type_t> l_Lst {&l_PoolAlloc};
for (std::size_t i {0}; i < k_NbElems; i++)
{
std::cout << "---- Emplace element n°" << i << " ----" << std::endl;
l_Lst.emplace_back();
}
std::cout << "---- End ----" << std::endl;
return 0;
}
Output:
---- Default allocator ----
---- Buffer resource ----
---- Monotonic allocator ----
---- Pool allocator ----
[Monotonic allocator (alloc)] Size: 528 Alignment: 8 ...
[Monotonic allocator (alloc)] ... Address: 0x7fffdcf0ef90
---- Create list ----
---- Emplace element n°0 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Monotonic allocator (alloc)] Size: 992 Alignment: 32 ...
[Monotonic allocator (alloc)] ... Address: 0x7fffdcf0f1a0
[Monotonic allocator (alloc)] Size: 192 Alignment: 8 ...
[Monotonic allocator (alloc)] ... Address: 0x7fffdcf0f580
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f1a0
---- Emplace element n°1 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f1b8
---- Emplace element n°2 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f1d0
---- Emplace element n°3 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f1e8
---- Emplace element n°4 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f200
---- Emplace element n°5 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f218
---- Emplace element n°6 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f230
---- Emplace element n°7 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f248
---- Emplace element n°8 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f260
---- Emplace element n°9 ----
[Pool allocator (alloc)] Size: 24 Alignment: 8 ...
[Pool allocator (alloc)] ... Address: 0x7fffdcf0f278
---- End ----
[Pool allocator (dealloc)] Address: 0x7fffdcf0f1a0 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f1b8 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f1d0 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f1e8 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f200 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f218 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f230 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f248 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f260 Dealloc Size: 24 Alignment: 8
[Pool allocator (dealloc)] Address: 0x7fffdcf0f278 Dealloc Size: 24 Alignment: 8
[Monotonic allocator (dealloc)] Address: 0x7fffdcf0f1a0 Dealloc Size: 992 Alignment: 32
[Monotonic allocator (dealloc)] Address: 0x7fffdcf0f580 Dealloc Size: 192 Alignment: 8
[Monotonic allocator (dealloc)] Address: 0x7fffdcf0ef90 Dealloc Size: 528 Alignment: 8
Online compiler: https://godbolt.org/z/EhTE1zr4Y

Related

Why do i get avc1.000000? (FFMPEG, H.264 Video Encoding, C++)

I have a bunch of bitmaps, and need to make them encoded in h.264 in fragmented .mp4.
I'm using C++.
What could cause that my AVC Profile is set to 0, SPS[], PPS[] and codec string to avc1.000000?
Output from the mp4info:
File:
minor version: 200
compatible brand: iso6
compatible brand: mp41
fast start: yes
Movie:
duration: 0 ms
time scale: 1000
fragments: yes
Found 1 Tracks
Track 1:
flags: 3 ENABLED IN-MOVIE
id: 1
type: Video
duration: 0 ms
language: und
media:
sample count: 0
timescale: 90000
duration: 0 (media timescale units)
duration: 0 (ms)
bitrate (computed): 412.672 Kbps
sample count with fragments: 35
duration with fragments: 540000
duration with fragments: 6000 (ms)
display width: 1280.000000
display height: 720.000000
Sample Description 0
Coding: avc1 (H.264)
Width:n 1280
Height: 720
Depth: 24
AVC Profile: 0
AVC Profile Compat: 0
AVC Level: 0
AVC NALU Length Size: 0
AVC SPS: []
AVC PPS: []
Codecs String: avc1.000000
I'm using things like
if (stream->codecpar->codec_id == AVCodecID.AV_CODEC_ID_H264)
{
err = ffmpeg.av_opt_set(cctx->priv_data, "preset", "ultrafast", 0);
err = ffmpeg.av_opt_set(cctx->priv_data, "tune", "zerolatency", 0);
err = ffmpeg.av_opt_set(cctx->priv_data, "profile", "high", 0);
}
...
AVDictionary* opts = null;
ffmpeg.av_dict_set(&opts, "movflags", "default_base_moof+frag_keyframe+empty_moov", 0);
...
AVPacket* pPacket = ffmpeg.av_packet_alloc();
try
{
int error;
do
{
ffmpeg.avcodec_send_frame(cctx, &convertedFrame).ThrowExceptionIfError();
error = ffmpeg.avcodec_receive_packet(cctx, pPacket);
} while (error == ffmpeg.AVERROR(ffmpeg.EAGAIN));
error.ThrowExceptionIfError();
}
finally
{
ffmpeg.av_packet_rescale_ts(pPacket, cctx->time_base, stream->time_base);
pPacket->stream_index = stream->index;
ffmpeg.av_interleaved_write_frame(ofctx, pPacket);
ffmpeg.av_packet_unref(pPacket);
}
What am I missing? I'm using examples from internet. Thought that if AVFrame is encoded (send_frame) with H264 with profiles and presets and received as AVPacket. It should be done automatically.
This is my first post, please be nice. Thanks in advance for helping.

"setrlimit()" is not affecting the running process

I am trying the simulate the Error Scenario of a Process in Linux that Heap is not enough to allocate the memory in a C++ Linux Application.
But Eventhough I use the "setrlimit" to reduce the Heap Memory available to the Process, still the heap memory is getting allocated successfully.
struct rlimit the_limit = { 1, 1 };
if (-1 == setrlimit(RLIMIT_DATA, &the_limit)) {
perror("setrlimit failed");
}
try
{
char *n = new char[5600];
if (n==NULL)
{
cout <<"\nAllocation Failure\n";
}
}
catch (std::bad_alloc& ba)
{
std::cerr << "bad_alloc caught: " << ba.what() << '\n';
}
Most C++ standard libs including the one supplied with g++ start off with some heap memory preallocated.
5600 is a small request and as such, on my Linux system it gets satisfied from the preallocated memory as evidenced
from an strace:
Modified example:
#include <stdio.h>
#include <sys/resource.h>
int main()
{
struct rlimit the_limit = { 1, 1 };
if (-1 == setrlimit(RLIMIT_DATA, &the_limit)) { perror("setrlimit failed"); }
puts("ALLOC");
#if __cplusplus
try { char *n = new char[5600]; } catch (...) { perror("alloc failure"); }
#else
{ char *n = malloc(1); if(!n) perror("alloc failure"); }
#endif
}
End of example's strace:
...
write(1, "ALLOC\n", 6ALLOC
) = 6
exit_group(0) = ?
Either increasing the request size, e.g. in my case to at least 1<<16, or switching to plain C, causes the allocation request to be served from the OS, and then the limit does apply:
End of strace with an 1<<16 allocation request:
write(1, "ALLOC\n", 6ALLOC
) = 6
brk(0x561bcc5d4000) = 0x561bcc5b2000
mmap(NULL, 1048576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
dup(2) = 3
fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
fstat(3, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 14), ...}) = 0
write(3, "alloc failure: Cannot allocate m"..., 38alloc failure: Cannot allocate memory
) = 38
close(3) = 0
exit_group(0) = ?
Note that generic allocator implementations generally use sbrk and/or
mmap to get memory directly from the OS, and as you can glean from the setrlimit manpage, RLIMIT_DATA will only apply to a mmap-backed allocation iff you're on a Linux >= 4.7.

c++ application fails allocating more hugepages than a certain limit

Overview
I have a c++ application that reads large amount of data (~1T). I run it using hugepages (614400 pages at 2M) and this works - until it hits 128G.
For testing I created a simple application in c++ that allocates chunks of 2M until it can't.
Application is run using:
LD_PRELOAD=/usr/lib64/libhugetlbfs.so HUGETLB_MORECORE=yes ./a.out
While running I monitor the nr of free hugepages (from /proc/meminfo).
I can see that it consumes hugepages at the expected rate.
However the application crashes with a std::bad_alloc exception at 128G allocated (or 65536 pages).
If I run two or more instances at the same time, they all crash at 128G each.
If I decrease the cgroup limit to something small, say 16G, the app crashes correctly at that point with a 'bus error'.
Am I missing something trivial? Please look below for details.
I'm running out of ideas...
Details
Machine, OS and software:
CPU : Intel(R) Xeon(R) CPU E5-2650 v4 # 2.20GHz
Memory : 1.5T
Kernel : 3.10.0-693.5.2.el7.x86_64 #1 SMP Fri Oct 20 20:32:50 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
OS : CentOS Linux release 7.4.1708 (Core)
hugetlbfs : 2.16-12.el7
gcc : 7.2.1 20170829
Simple test code I used (allocates chunks of 2M until free hugepages is below a limit)
#include <iostream>
#include <fstream>
#include <vector>
#include <array>
#include <string>
#define MEM512K 512*1024ul
#define MEM2M 4*MEM512K
// data block
template <size_t N>
struct DataBlock {
char data[N];
};
// Hugepage info
struct HugePageInfo {
size_t memfree;
size_t total;
size_t free;
size_t size;
size_t used;
double used_size;
};
// dump hugepage info
void dumpHPI(const HugePageInfo & hpi) {
std::cout << "HugePages total : " << hpi.total << std::endl;
std::cout << "HugePages free : " << hpi.free << std::endl;
std::cout << "HugePages size : " << hpi.size << std::endl;
}
// dump hugepage info in one line
void dumpHPIline(const size_t i, const HugePageInfo & hpi) {
std::cout << i << " "
<< hpi.memfree << " "
<< hpi.total-hpi.free << " "
<< hpi.free << " "
<< hpi.used_size
<< std::endl;
}
// get hugepage info from /proc/meminfo
void getHugePageInfo( HugePageInfo & hpi ) {
std::ifstream fmeminfo;
fmeminfo.open("/proc/meminfo",std::ifstream::in);
std::string line;
size_t n=0;
while (fmeminfo.good()) {
std::getline(fmeminfo,line);
const size_t sep = line.find_first_of(':');
if (sep==std::string::npos) continue;
const std::string lblstr = line.substr(0,sep);
const size_t endpos = line.find(" kB");
const std::string trmstr = line.substr(sep+1,(endpos==std::string::npos ? line.size() : endpos-sep-1));
const size_t startpos = trmstr.find_first_not_of(' ');
const std::string valstr = (startpos==std::string::npos ? trmstr : trmstr.substr(startpos) );
if (lblstr=="HugePages_Total") {
hpi.total = std::stoi(valstr);
} else if (lblstr=="HugePages_Free") {
hpi.free = std::stoi(valstr);
} else if (lblstr=="Hugepagesize") {
hpi.size = std::stoi(valstr);
} else if (lblstr=="MemFree") {
hpi.memfree = std::stoi(valstr);
}
}
hpi.used = hpi.total - hpi.free;
hpi.used_size = double(hpi.used*hpi.size)/1024.0/1024.0;
}
// allocate data
void test_rnd_data() {
typedef DataBlock<MEM2M> elem_t;
HugePageInfo hpi;
getHugePageInfo(hpi);
dumpHPIline(0,hpi);
std::array<elem_t *,MEM512K> memmap;
for (size_t i=0; i<memmap.size(); i++) memmap[i]=nullptr;
for (size_t i=0; i<memmap.size(); i++) {
// allocate a new 2M block
memmap[i] = new elem_t();
// output progress
if (i%1000==0) {
getHugePageInfo(hpi);
dumpHPIline(i,hpi);
if (hpi.free<1000) break;
}
}
std::cout << "Cleaning up...." << std::endl;
for (size_t i=0; i<memmap.size(); i++) {
if (memmap[i]==nullptr) continue;
delete memmap[i];
}
}
int main(int argc, const char** argv) {
test_rnd_data();
}
Hugepages is setup at boot time to use 614400 pages at 2M each.
From /proc/meminfo:
MemTotal: 1584978368 kB
MemFree: 311062332 kB
MemAvailable: 309934096 kB
Buffers: 3220 kB
Cached: 613396 kB
SwapCached: 0 kB
Active: 556884 kB
Inactive: 281648 kB
Active(anon): 224604 kB
Inactive(anon): 15660 kB
Active(file): 332280 kB
Inactive(file): 265988 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 2097148 kB
SwapFree: 2097148 kB
Dirty: 0 kB
Writeback: 0 kB
AnonPages: 222280 kB
Mapped: 89784 kB
Shmem: 18348 kB
Slab: 482556 kB
SReclaimable: 189720 kB
SUnreclaim: 292836 kB
KernelStack: 11248 kB
PageTables: 14628 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 165440732 kB
Committed_AS: 1636296 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 7789100 kB
VmallocChunk: 33546287092 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
HugePages_Total: 614400
HugePages_Free: 614400
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 341900 kB
DirectMap2M: 59328512 kB
DirectMap1G: 1552941056 kB
Limits from ulimit:
core file size (blocks, -c) 0
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 6191203
max locked memory (kbytes, -l) 1258291200
max memory size (kbytes, -m) unlimited
open files (-n) 1024
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) 4096
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
cgroup limit:
> cat /sys/fs/cgroup/hugetlb/hugetlb.2MB.limit_in_bytes
9223372036854771712
Tests
Output when running the test code using HUGETLB_DEBUG=1:
...
libhugetlbfs [abc:185885]: INFO: Attempting to map 2097152 bytes
libhugetlbfs [abc:185885]: INFO: ... = 0x1ffb200000
libhugetlbfs [abc:185885]: INFO: hugetlbfs_morecore(2097152) = ...
libhugetlbfs [abc:185885]: INFO: heapbase = 0xa00000, heaptop = 0x1ffb400000, mapsize = 1ffaa00000, delta=2097152
libhugetlbfs [abc:185885]: INFO: Attempting to map 2097152 bytes
libhugetlbfs [abc:185885]: WARNING: New heap segment map at 0x1ffb400000 failed: Cannot allocate memory
terminate called after throwing an instance of 'std::bad_alloc'
what(): std::bad_alloc
Aborted (core dumped)
Using strace:
...
mmap(0x1ffb400000, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0x1ffa200000) = 0x1ffb400000
mmap(0x1ffb600000, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0x1ffa400000) = 0x1ffb600000
mmap(0x1ffb800000, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0x1ffa600000) = 0x1ffb800000
mmap(0x1ffba00000, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0x1ffa800000) = 0x1ffba00000
mmap(0x1ffbc00000, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0x1ffaa00000) = -1 ENOMEM (Cannot allocate memory)
write(2, "libhugetlbfs", 12) = 12
write(2, ": WARNING: New heap segment map "..., 79) = 79
mmap(NULL, 3149824, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 67108864, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 67108864, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = -1 ENOMEM (Cannot allocate memory)
mmap(NULL, 2101248, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory)
write(2, "terminate called after throwing "..., 48) = 48
write(2, "std::bad_alloc", 14) = 14
write(2, "'\n", 2) = 2
write(2, " what(): ", 11) = 11
write(2, "std::bad_alloc", 14) = 14
write(2, "\n", 1) = 1
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
gettid() = 188617
tgkill(188617, 188617, SIGABRT) = 0
--- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=188617, si_uid=1001} ---
Finally in /proc/pid/numa_maps:
...
1ffb000000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N1=1 kernelpagesize_kB=2048
1ffb200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N1=1 kernelpagesize_kB=2048
1ffb400000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N1=1 kernelpagesize_kB=2048
1ffb600000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N1=1 kernelpagesize_kB=2048
1ffb800000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N1=1 kernelpagesize_kB=2048
...
However the application crashes with a std::bad_alloc exception at 128G allocated (or 65536 pages).
You are allocating too many small-sized segments, there is a limit of the number of map segments you can get per process.
sysctl -n vm.max_map_count
You are trying to use 1024 * 512 * 4 == 2097152 MAP at least and one more for the array, but the default value of vm.max_map_count is only 65536.
You can change it with:
sysctl -w vm.max_map_count=3000000
Or you could allocate a bigger segment in your code.

C++ blocking queue can't poll current data

I have faced an odd situation.
I have a android app which calls C++ code.
In C++ code, I have two thread one is putting data into a blocking queue and another thread is taking data out of blocking queue.
The data structure is like:
typedef struct {
int len;
char* data;
} myStruct;
Every time I will put a pointer of myStruct into the queue and get this pointer out of the queue.
But sometime I will get a very big len from the data. like the log file I showed below:
Put Log Length: 128
Take Log Length: 128
Put Log Length: 171
Take Log Length: 171
Put Log Length: 73
Take Log Length: 73
Put Length: 99
Put Length: 72
Put Length: 124
Take Log Length: 72
......
Take Log Length: 2047249896
My Blocking Queue code is list below:
#include "BlockingQueue.h"
template<typename T>
void BlockingQueue<T>::put(const T& task)
{
std::unique_lock<std::mutex> lock(mtx);
q.push_back( task );
isEmpty.notify_all();
}
template<typename T>
T BlockingQueue<T>::take()
{
std::unique_lock<std::mutex> lock(mtx);
isEmpty.wait(lock, [this]{return !q.empty(); });
T front( q.front() );
q.pop_front();
return front;
}
template<typename T>
bool BlockingQueue<T>::empty()
{
std::unique_lock<std::mutex> lock(mtx);
return q.empty();
}

shmget fails with ENOMEM even though enough pages available

We're getting odd behaviour when trying to allocate an approximately 10MB block of memory from huge pages. System is SL6.4 64-bit, recent Intel CPU, 64GB RAM.
Initially we allocated 20 huge pages which should be enough.
$ cat /proc/meminfo | grep Huge
AnonHugePages: 0 kB
HugePages_Total: 20
HugePages_Free: 20
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
Other huge page settings:
/proc/sys/kernel/shmall = 4294967296
/proc/sys/kernel/shmmax = 68719476736
/proc/sys/kernel/shmmni = 4096
/proc/sys/kernel/shm_rmid_forced = 0
shmget fails with ENOMEM. The only explanation I can find for this is in the man page which states "No memory could be allocated for segment overhead." but I haven't been able to discover what "segment overhead" is.
On another server with the same number of pages configured shmget returns successfully.
On the problem server we increased the number of huge pages to 100. The allocation succeeds but also allocates 64 2MB huge pages:
$ ipcs -m
------ Shared Memory Segments --------
key shmid owner perms bytes nattch status
0x0091efab 10223638 rsprod 600 2097152 1
0x0092efab 10256407 rsprod 600 2097152 1
0x0093efab 10289176 rsprod 600 2097152 1
0x0094efab 10321945 rsprod 600 2097152 1
0x0095efab 10354714 rsprod 600 2097152 1
0x0096efab 10387483 rsprod 600 2097152 1
...
0x00cdefab 12189778 rsprod 600 2097152 1
0x00ceefab 12222547 rsprod 600 2097152 1
0x00cfefab 12255316 rsprod 600 2097152 1
0x00d0efab 12288085 rsprod 600 2097152 1
0x00000000 12320854 rsprod 600 10485760 1
The code calling shmget is below. This is only being called once in the application.
uint64_t GetHugePageSize()
{
FILE *meminfo = fopen("/proc/meminfo", "r");
if(meminfo == NULL) {
return 0;
}
char line[256];
while(fgets(line, sizeof(line), meminfo)) {
uint64_t zHugePageSize = 0;
if(sscanf(line, "Hugepagesize: %lu kB", &zHugePageSize) == 1) {
fclose(meminfo);
return zHugePageSize*1024;
}
}
fclose(meminfo);
return 0;
}
char* HugeTableNew(size_t aSize, int& aSharedMemID)
{
static const uint64_t sHugePageSize = GetHugePageSize();
uint64_t zSize = aSize;
// round up to next page size, otherwise shmat fails with EINVAL (22)
const uint64_t HUGE_PAGE_MASK = sHugePageSize-1;
if(aSize & HUGE_PAGE_MASK) {
zSize = (aSize&~HUGE_PAGE_MASK) + sHugePageSize;
}
aSharedMemID = shmget(IPC_PRIVATE, zSize, IPC_CREAT|SHM_HUGETLB|SHM_R|SHM_W);
if(aSharedMemID < 0) {
perror("shmget");
return nullptr;
}
...
Does anyone know:
What causes the allocation to fail when there are enough free huge pages available?
What causes the extra 2MB pages to be allocated?
What "segment overhead" is?