I have a webcam connected to beaglebone via usb. I am coding in c++ and my goal is to capture raw UNCOMPRESSED picture from the webcam.
Firstly i checked what formats are supported via command v4l2-ctl --list-formats and the result was:
Index : 0
Type : Video Capture
Pixel Format: 'MJPG' (compressed)
Name : Motion-JPEG
Index : 1
Type : Video Capture
Pixel Format: 'YUYV'
Name : YUYV 4:2:2
So from this I assume it has to be possible to get an uncompressed picture if i try to use YUYV format.
Knowing this I started writing a program in c++. I successfully written a program to capture a compressed picture, but when trying to capture using format YUYV it doesnt work and i really need some help to get this done.
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <libv4l2.h>
template <typename typeXX>
void clear_memmory(typeXX* x) {
memset(x, 0, sizeof(*x));
}
void xioctl(int cd, int request, void *arg){
int response;
do{
//ensures we get the correct response.
response = v4l2_ioctl(cd, request, arg);
}
while (response == -1 && ((errno == EINTR) || (errno == EAGAIN)));
if (response == -1) {
fprintf(stderr, "error %d, %s\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
}
struct LMSBBB_buffer{
void* start;
size_t length;
};
int main(){
const char* dev_name = "/dev/video0";
int width=1920;
int height=1080;
int fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
struct v4l2_format format = {0};
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
format.fmt.pix.width = width;
format.fmt.pix.height = height;
format.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;//V4L2_PIX_FMT_YUYV //V4L2_PIX_FMT_RGB24
format.fmt.pix.field = V4L2_FIELD_NONE; //V4L2_FIELD_NONE
xioctl(fd, VIDIOC_S_FMT, &format);
printf("Device initialized.\n");
///request buffers
struct v4l2_requestbuffers req = {0};
req.count = 2;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
xioctl(fd, VIDIOC_REQBUFS, &req);
printf("Buffers requested.\n");
///mapping buffers
struct v4l2_buffer buf;
LMSBBB_buffer* buffers;
unsigned int i;
buffers = (LMSBBB_buffer*) calloc(req.count, sizeof(*buffers));
for (i = 0; i < req.count; i++) {
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
(buf).index = i;
xioctl(fd, VIDIOC_QUERYBUF, &buf);
buffers[i].length = (buf).length;
printf("A buff has a len of: %i\n",buffers[i].length);
buffers[i].start = v4l2_mmap(NULL, (buf).length, PROT_READ | PROT_WRITE, MAP_SHARED,fd, (buf).m.offset);
if (MAP_FAILED == buffers[i].start) {
perror("Can not map the buffers.");
exit(EXIT_FAILURE);
}
}
printf("Buffers mapped.\n");
for (i = 0; i < req.count; i++) {
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
(buf).index = i;
ioctl(fd,VIDIOC_QBUF, &(buf));
}
enum v4l2_buf_type type;
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
ioctl(fd,VIDIOC_STREAMON, &type);
printf("buffers queued and streaming.\n");
int pic_count=0;
///CAPTURE
fd_set fds;
struct timeval tv;
int r;
char out_name[256];
FILE* fout;
do {
FD_ZERO(&fds);
FD_SET(fd, &fds);
// Timeout.
tv.tv_sec = 2;
tv.tv_usec = 0;
r = select(fd + 1, &fds, NULL, NULL, &tv);
} while ((r == -1 && (errno = EINTR)));
if (r == -1) {
perror("select");
exit(EXIT_FAILURE);
}
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
xioctl(fd,VIDIOC_DQBUF, &(buf));
printf("Buff index: %i\n",(buf).index);
sprintf(out_name, "image%03d.ppm",pic_count);
fout = fopen(out_name, "w");
if (!fout) {
perror("Cannot open image");
exit(EXIT_FAILURE);
}
fprintf(fout, "P6\n%d %d 255\n",width, height);
fwrite(buffers[(buf).index].start, (buf).bytesused, 1, fout);
fclose(fout);
pic_count++;
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
xioctl(fd,VIDIOC_DQBUF, &(buf));
printf("Buff index: %i\n",(buf).index);
sprintf(out_name, "image%03d.ppm",pic_count);
fout = fopen(out_name, "w");
if (!fout) {
perror("Cannot open image");
exit(EXIT_FAILURE);
}
fprintf(fout, "P6\n%d %d 255\n",width, height);
fwrite(buffers[(buf).index].start, (buf).bytesused, 1, fout);
fclose(fout);
pic_count++;
///xioctl(fd,VIDIOC_QBUF, &(buf));
return 0;
}
in line 50, i can choose the format between V4L2_PIX_FMT_YUYV and V4L2_PIX_FMT_RGB24.
for V4L2_PIX_FMT_RGB24 i get the picture, but when using V4L2_PIX_FMT_YUYV I get this error:
libv4l2: error dequeuing buf: Resource temporarily unavailable
libv4l2: error dequeuing buf: Resource temporarily unavailable
libv4l2: error dequeuing buf: Resource temporarily unavailable
libv4l2: error dequeuing buf: Resource temporarily unavailable
libv4l2: error dequeuing buf: Resource temporarily unavailable
the error lines goes for ever until i end the program manually.
Does anyone have an idea what to do? I spent over 2 weeks on this and i can't move anywhere from here. I would really appreciate any advice.
From what I see you are requesting a FullHD (1920x1080) buffer in YUYV format from a camera. You did not mention the camera type/model/specs, but if it is a generic USB-attached hardware most likely you will not get a raw FullHD YUYV buffer as an output, only the MJPEG one (which you can decode to YUV, if you hack around with libjpeg) or the decoded RGB buffer (which is pretty much the decoded MJPEG with YUV->RGB conversion) which is not mmapped.
The exact list of formats with framerates can be requested by this command, which would probably tell you it does not provide a 1920x1080 YUYV, only something smaller, like 640x480:
v4l2-ctl --list-formats
If you need video processing with "true" zero-copy access to raw YUYV camera frames, you need direct access to hardware and that specific hardware in the first place. Once you have the USB interface between your software and the camera, you get an extra indirection and that means the speed goes down. Think for a moment, the YUYV frame at 1920x1080 takes up approximately 4 Megabytes of memory. At 30 FPS this is 120 Megabytes (or 960 Megabits) per second of bus throughput. If you have a USB2.0 camera, there is just no bandwidth to support this (thus the need for MJPEG). Even at 15FPS this is 480 Megabits, not counting the USB latency and protocol overhead.
To provide some "actionable feedback" I would advice to first concentrate on the algorithms (probably, you just don't want to loose the processing speed at the very first step) which you want to apply to the image. Don't hesitate to use OpenCV for camera input and basic image processing, later you can switch to some hardware interface and hand-written algorithms.
The easier way of getting raw frames would be to use Android's camera interface and try to process the incoming frames with GLSL shaders using the GL_TEXTURE_EXTERNAL_OES extension, about which there information and code samples available. There you can connect GL textures to AHardwareBuffer instances and then use AHardwareBuffer_lock function to get raw pointers. The exact supported formats also may vary across the hardware, so do not expect this to be super-easy.
I've recently had a similar issue. In my case the camera driver needed the VIDIOC_S_PARM ioctl in order to set the frame rate and initialize the camera for the selected capture mode.
You can try to add this code after the VIDIOC_S_FMT and see if it works for you as well:
struct v4l2_streamparm streamparam;
memset(&streamparam, 0, sizeof(streamparam));
streamparam.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
xioctl(fd, VIDIOC_G_PARM, &streamparam);
streamparam.parm.capture.timeperframe.numerator = 1;
streamparam.parm.capture.timeperframe.denominator = 5;
xioctl(fd, VIDIOC_S_PARM, &streamparam);
Related
I have been struggling with making a c++ code for capturing a picture from web-camera. I successfully did it, but I would like some clarification about the process i took.
So my code can be described in 6 steps, i will write them here along with my questions:
I. step: Initialize the device and set image format.
const char* dev_name = "/dev/video0";
int width=320;
int height=240;
int fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
struct v4l2_format format = {0};
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
format.fmt.pix.width = width;
format.fmt.pix.height = height;
format.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;//V4L2_PIX_FMT_YUYV //V4L2_PIX_FMT_RGB24
format.fmt.pix.field = V4L2_FIELD_NONE; //V4L2_FIELD_NONE
xioctl(fd, VIDIOC_S_FMT, &format);
II. step: Request for a buffer.
struct v4l2_requestbuffers req = {0};
req.count = 2;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
xioctl(fd, VIDIOC_REQBUFS, &req);
question: What exactly does this "request a buffer" do?
question: Is this buffer physically located inside the camera or on my pc?
III. step: Query the buffer and mapping.
struct v4l2_buffer buf;
buffer* buffers;
unsigned int i;
buffers = (buffer*) calloc(req.count, sizeof(*buffers));
for (i = 0; i < req.count; i++) {
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
(buf).index = i;
xioctl(fd, VIDIOC_QUERYBUF, &buf);
buffers[i].length = (buf).length;
printf("A buff has a len of: %i\n",buffers[i].length);
buffers[i].start = v4l2_mmap(NULL, (buf).length, PROT_READ | PROT_WRITE, MAP_SHARED,fd, (buf).m.offset);
if (MAP_FAILED == buffers[i].start) {
perror("Can not map the buffers.");
exit(EXIT_FAILURE);
}
}
question: I understand that i do some mapping here, but can somebody explain why is this needed and what exactly is mapped where. To me it sounds like actual buffer is somewhere else and then i mapp it into my own buffer so i can read things from my buffer. Is this correct and where is the actual buffer?
question: Could i also avoid the mapping?
IV. step: Start streaming. Queue the buffer.
for (i = 0; i < 1; i++) {
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
(buf).index = i;
xioctl(fd,VIDIOC_QBUF, &(buf));
}
enum v4l2_buf_type type;
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
xioctl(fd,VIDIOC_STREAMON, &type);
question: What does the VIDIOC_QBUF do? Please be very precise here and using mostly child words in answer to this since this is very confusing.
question: Should VIDIOC_QBUF happen after VIDIOC_STREAMON? Does it matter?
V. step: DeQueue the buffer and save a frame.
do {
FD_ZERO(&fds);
FD_SET(fd, &fds);
// Timeout.
tv.tv_sec = 2;
tv.tv_usec = 0;
r = select(fd + 1, &fds, NULL, NULL, &tv);
} while ((r == -1 && (errno = EINTR)));
if (r == -1) {
perror("select");
exit(EXIT_FAILURE);
}
clear_memmory(&(buf));
(buf).type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
(buf).memory = V4L2_MEMORY_MMAP;
xioctl(fd,VIDIOC_DQBUF, &(buf));
printf("Buff index: %i\n",(buf).index);
sprintf(out_name, "image%03d.ppm",pic_count);
fout = fopen(out_name, "w");
if (!fout) {
perror("Cannot open image");
exit(EXIT_FAILURE);
}
fwrite(buffers[(buf).index].start, (buf).bytesused, 1, fout);
fclose(fout);
pic_count++;
question: Why is VIDIOC_DQBUF needed and what does it do?
question: If i want to capture 3 frames and if i have only 1 buffer for 1 picture, do i need to queue, dequeue 3 times?
question: If i want 1000000 pictures, can I make that buffer big enough to hold them all? What is limiting the size of that buffer? Is that buffer on the camera and that means all this pictures would be sitting in it?
question: If i want 10 pictures taken at moments I chose, and if i go with making a buffer large enough for 10 pictures. What should i call at the moment i want to take a picture? only VIDIOC_QBUF? VIDIOC_QBUF and VIDIOC_DQBUF?, only VIDIOC_DQBUF?
Please don't point me to https://01.org/linuxgraphics/gfx-docs/drm/media/uapi/v4l/vidioc-qbuf.html or other sites, as I have read everything i was able to found and I am still left with above unclarity. I really want detailed explanations about these questions. I thank you in advance for all helpful answers.
I am trying to build a NDK based c++ low latancy audio player which will encounter three operations for multiple audios.
Play from assets.
Stream from an online source.
Play from local device storage.
From one of the Oboe samples provided by Google, I added another function to the class NDKExtractor.cpp to extract a URL based audio and render it to audio device while reading from source at the same time.
int32_t NDKExtractor::decode(char *file, uint8_t *targetData, AudioProperties targetProperties) {
LOGD("Using NDK decoder: %s",file);
// Extract the audio frames
AMediaExtractor *extractor = AMediaExtractor_new();
//using this method instead of AMediaExtractor_setDataSourceFd() as used for asset files in the rythem game example
media_status_t amresult = AMediaExtractor_setDataSource(extractor, file);
if (amresult != AMEDIA_OK) {
LOGE("Error setting extractor data source, err %d", amresult);
return 0;
}
// Specify our desired output format by creating it from our source
AMediaFormat *format = AMediaExtractor_getTrackFormat(extractor, 0);
int32_t sampleRate;
if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_SAMPLE_RATE, &sampleRate)) {
LOGD("Source sample rate %d", sampleRate);
if (sampleRate != targetProperties.sampleRate) {
LOGE("Input (%d) and output (%d) sample rates do not match. "
"NDK decoder does not support resampling.",
sampleRate,
targetProperties.sampleRate);
return 0;
}
} else {
LOGE("Failed to get sample rate");
return 0;
};
int32_t channelCount;
if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &channelCount)) {
LOGD("Got channel count %d", channelCount);
if (channelCount != targetProperties.channelCount) {
LOGE("NDK decoder does not support different "
"input (%d) and output (%d) channel counts",
channelCount,
targetProperties.channelCount);
}
} else {
LOGE("Failed to get channel count");
return 0;
}
const char *formatStr = AMediaFormat_toString(format);
LOGD("Output format %s", formatStr);
const char *mimeType;
if (AMediaFormat_getString(format, AMEDIAFORMAT_KEY_MIME, &mimeType)) {
LOGD("Got mime type %s", mimeType);
} else {
LOGE("Failed to get mime type");
return 0;
}
// Obtain the correct decoder
AMediaCodec *codec = nullptr;
AMediaExtractor_selectTrack(extractor, 0);
codec = AMediaCodec_createDecoderByType(mimeType);
AMediaCodec_configure(codec, format, nullptr, nullptr, 0);
AMediaCodec_start(codec);
// DECODE
bool isExtracting = true;
bool isDecoding = true;
int32_t bytesWritten = 0;
while (isExtracting || isDecoding) {
if (isExtracting) {
// Obtain the index of the next available input buffer
ssize_t inputIndex = AMediaCodec_dequeueInputBuffer(codec, 2000);
//LOGV("Got input buffer %d", inputIndex);
// The input index acts as a status if its negative
if (inputIndex < 0) {
if (inputIndex == AMEDIACODEC_INFO_TRY_AGAIN_LATER) {
// LOGV("Codec.dequeueInputBuffer try again later");
} else {
LOGE("Codec.dequeueInputBuffer unknown error status");
}
} else {
// Obtain the actual buffer and read the encoded data into it
size_t inputSize;
uint8_t *inputBuffer = AMediaCodec_getInputBuffer(codec, inputIndex,
&inputSize);
//LOGV("Sample size is: %d", inputSize);
ssize_t sampleSize = AMediaExtractor_readSampleData(extractor, inputBuffer,
inputSize);
auto presentationTimeUs = AMediaExtractor_getSampleTime(extractor);
if (sampleSize > 0) {
// Enqueue the encoded data
AMediaCodec_queueInputBuffer(codec, inputIndex, 0, sampleSize,
presentationTimeUs,
0);
AMediaExtractor_advance(extractor);
} else {
LOGD("End of extractor data stream");
isExtracting = false;
// We need to tell the codec that we've reached the end of the stream
AMediaCodec_queueInputBuffer(codec, inputIndex, 0, 0,
presentationTimeUs,
AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM);
}
}
}
if (isDecoding) {
// Dequeue the decoded data
AMediaCodecBufferInfo info;
ssize_t outputIndex = AMediaCodec_dequeueOutputBuffer(codec, &info, 0);
if (outputIndex >= 0) {
// Check whether this is set earlier
if (info.flags & AMEDIACODEC_BUFFER_FLAG_END_OF_STREAM) {
LOGD("Reached end of decoding stream");
isDecoding = false;
} else {
// Valid index, acquire buffer
size_t outputSize;
uint8_t *outputBuffer = AMediaCodec_getOutputBuffer(codec, outputIndex,
&outputSize);
/*LOGV("Got output buffer index %d, buffer size: %d, info size: %d writing to pcm index %d",
outputIndex,
outputSize,
info.size,
m_writeIndex);*/
// copy the data out of the buffer
memcpy(targetData + bytesWritten, outputBuffer, info.size);
bytesWritten += info.size;
AMediaCodec_releaseOutputBuffer(codec, outputIndex, false);
}
} else {
// The outputIndex doubles as a status return if its value is < 0
switch (outputIndex) {
case AMEDIACODEC_INFO_TRY_AGAIN_LATER:
LOGD("dequeueOutputBuffer: try again later");
break;
case AMEDIACODEC_INFO_OUTPUT_BUFFERS_CHANGED:
LOGD("dequeueOutputBuffer: output buffers changed");
break;
case AMEDIACODEC_INFO_OUTPUT_FORMAT_CHANGED:
LOGD("dequeueOutputBuffer: output outputFormat changed");
format = AMediaCodec_getOutputFormat(codec);
LOGD("outputFormat changed to: %s", AMediaFormat_toString(format));
break;
}
}
}
}
// Clean up
AMediaFormat_delete(format);
AMediaCodec_delete(codec);
AMediaExtractor_delete(extractor);
return bytesWritten;
}
Now the problem i am facing is that this code it first extracts all the audio data saves it into a buffer which then becomes part of AFileDataSource which i derived from DataSource class in the same sample.
And after its done extracting the whole file it plays by calling the onAudioReady() for Oboe AudioStreamBuilder.
What I need is to play as it streams the chunk of audio buffer.
Optional Query: Also aside from the question it blocks the UI even though i created a foreground service to communicate with the NDK functions to execute this code. Any thoughts on this?
You probably solved this already, but for future readers...
You need a FIFO buffer to store the decoded audio. You can use the Oboe's FIFO buffer e.g. oboe::FifoBuffer.
You can have a low/high watermark for the buffer and a state machine, so you start decoding when the buffer is almost empty and you stop decoding when it's full (you'll figure out the other states that you need).
As a side note, I implemented such player only to find at some later time, that the AAC codec is broken on some devices (Xiaomi and Amazon come to mind), so I had to throw away the AMediaCodec/AMediaExtractor parts and use an AAC library instead.
You have to implement a ringBuffer (or use the one implemented in the oboe example LockFreeQueue.h) and copy the data on buffers that you send on the ringbuffer from the extracting thread. On the other end of the RingBuffer, the audio thread will get that data from the queue and copy it to the audio buffer. This will happen on onAudioReady(oboe::AudioStream *oboeStream, void *audioData, int32_t numFrames) callback that you have to implement in your class (look oboe docs). Be sure to follow all the good practices on the Audio thread (don't allocate/deallocate memory there, no mutexes and no file I/O etc.)
Optional query: A service doesn't run in a separate thread, so obviously if you call it from UI thread it blocks the UI. Look at other types of services, there you can have IntentService or a service with a Messenger that will launch a separate thread on Java, or you can create threads in C++ side using std::thread
I am able to read a video file of h264 format and doing some machine learning inference on top of it. The code works absolutely fine for input from a file. Below code is a sample code from Deepstream SDK
FileDataProvider(const char *szFilePath, simplelogger::Logger *logger)
: logger_(logger)
{
fp_ = fopen(szFilePath, "rb");
//fp_ = fopen("/dev/video0", "rb");
if (nullptr == fp_) {
LOG_ERROR(logger, "Failed to open file " << szFilePath);
exit(1);
}
pLoadBuf_ = new uint8_t[nLoadBuf_];
pPktBuf_ = new uint8_t[nPktBuf_];
assert(nullptr != pLoadBuf_);
}
~FileDataProvider() {
if (fp_) {
fclose(fp_);
}
if (pLoadBuf_) {
delete [] pLoadBuf_;
}
if (pPktBuf_) {
delete [] pPktBuf_;
}
}
What is requirement ?
Read from the Logitech c920 webcam instead for video file.
I know How to read from webcam using opencv. But I don't want to use opencv here.
My Research
Using v4l we can get the stream and display it in vlc.
Camera supports below formats.
#ubox:~$ v4l2-ctl --device=/dev/video1 --list-formats
ioctl: VIDIOC_ENUM_FMT Index : 0 Type : Video Capture
Pixel Format: 'YUYV' Name : YUYV 4:2:2
Index : 1 Type : Video Capture Pixel Format: 'H264'
(compressed) Name : H.264
Index : 2 Type : Video Capture Pixel Format: 'MJPG'
(compressed) Name : Motion-JPEG
Reading output of a USB webcam in Linux
vlc v4l2:///dev/video1 --v4l2-chroma=h264 - this displays the video from the webcam.
How to do this?
- Now how to feed this live stream into
above sample code such that it reads from the webcam rather than file?
[update-1]
- In otherwords, does v4l has some options to write the video stream as h264 formant ? So that, I can read that file like before(above code) when its(v4l) writing to disk.
[update-2]
- we can use ffmpeg instead of v4l. If any solutions for using ffmpeg to save the video stream into disk continuously, so that other programs reads that file ?
Before using ioctl to capture frames from camera, you need to set the format like below first.
fp_ = open("/dev/video0", O_RDWR);
struct v4l2_format fmt = {0};
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_H264;
ioctl(fp_, VIDIOC_S_FMT, &fmt);
then, initialize and map buffer
struct Buffer
{
void *start;
unsigned int length;
unsigned int flags;
};
int buffer_count_ = 4;
Buffer *buffers_;
bool AllocateBuffer()
{
struct v4l2_requestbuffers req = {0};
req.count = buffer_count_;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (ioctl(fp_, VIDIOC_REQBUFS, &req) < 0)
{
perror("ioctl Requesting Buffer");
return false;
}
buffers_ = new Buffer[buffer_count_];
for (int i = 0; i < buffer_count_; i++)
{
struct v4l2_buffer buf = {0};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (ioctl(fp_, VIDIOC_QUERYBUF, &buf) < 0)
{
perror("ioctl Querying Buffer");
return false;
}
buffers_[i].start = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, buf.m.offset);
buffers_[i].length = buf.length;
if (MAP_FAILED == buffers_[i].start)
{
printf("MAP FAILED: %d\n", i);
for (int j = 0; j < i; j++)
munmap(buffers_[j].start, buffers_[j].length);
return false;
}
if (ioctl(fp_, VIDIOC_QBUF, &buf) < 0)
{
perror("ioctl Queue Buffer");
return false;
}
}
return true;
}
STREAMON to start capturing
v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
ioctl(fp_, VIDIOC_STREAMON, &type);
finally read a frame from the mapped buffer. Generally, CaptureImage() will be in the while loop.
Buffer CaptureImage()
{
fd_set fds;
FD_ZERO(&fds);
FD_SET(fd_, &fds);
struct timeval tv = {0};
tv.tv_sec = 1;
tv.tv_usec = 0;
int r = select(fd_ + 1, &fds, NULL, NULL, &tv);
if (r == 0)
{
// timeout
}
struct v4l2_buffer buf = {0};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
while (ioctl(fp_, VIDIOC_DQBUF, &buf) < 0)
{
perror("Retrieving Frame");
}
struct Buffer buffer = {.start = buffers_[buf.index].start,
.length = buf.bytesused,
.flags = buf.flags};
if (ioctl(fp_, VIDIOC_QBUF, &buf) < 0)
{
perror("Queue buffer");
}
return buffer;
}
I'm trying to stream video on a Raspberry Pi using the official V4L2 driver with the Raspberry Pi camera, from C++ on raspbian (2015-02 release), and I'm having low FPS issues.
Currently I'm just creating a window and copying the buffer to the screen (which takes about 30ms) whereas the select() takes about 140ms (for a total of 5-6 fps). I also tried sleeping for 100ms and it decreases the select() time by a similar amount (resulting in the same fps). CPU load is about 5-15%.
I also tried changing the driver fps from console (or system()) but it only works downwards (for example, if I set the driver fps to 1fps, I'll get 1fps but if I set it to 90fps I still get 5-6fps, even though the driver confirms setting it to 90fps).
Also, when querying FPS modes for the used resolution I get 90fps.
I included the parts of the code related to V4L2 (code omitted between different parts) :
//////////////////
// Open device
//////////////////
mFD = open(mDevName, O_RDWR | O_NONBLOCK, 0);
if (mFD == -1) ErrnoExit("Open device failed");
//////////////////
// Setup format
//////////////////
struct v4l2_format fmt;
memset(&fmt, 0, sizeof(fmt));
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
Xioctl(VIDIOC_G_FMT, &fmt);
mImgWidth = fmt.fmt.pix.width;
mImgHeight = fmt.fmt.pix.height;
cout << "width=" << mImgWidth << " height=" << mImgHeight << "\nbytesperline=" << fmt.fmt.pix.bytesperline << " sizeimage=" << fmt.fmt.pix.sizeimage << "\n";
// For some reason querying the format always sets pixelformat to JPEG
// no matter the input, so set it back to YUYV
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
if (Xioctl(VIDIOC_S_FMT, &fmt) == -1)
{
cout << "Set video format failed : " << strerror(errno) << "\n";
}
//////////////////
// Setup streaming
//////////////////
struct v4l2_requestbuffers req;
memset(&req, 0, sizeof(req));
req.count = 20;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (-1 == Xioctl(VIDIOC_REQBUFS, &req))
{
ErrnoExit("Reqbufs");
}
if (req.count < 2)
throw "Not enough buffer memory !";
mNBuffers = req.count;
mBuffers = new CBuffer[mNBuffers];
if (!mBuffers) throw "Out of memory !";
for (unsigned int i = 0; i < mNBuffers; i++)
{
struct v4l2_buffer buf;
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (-1 == Xioctl(VIDIOC_QUERYBUF, &buf))
ErrnoExit("Querybuf");
mBuffers[i].mLength = buf.length;
mBuffers[i].pStart = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, mFD, buf.m.offset);
if (mBuffers[i].pStart == MAP_FAILED)
ErrnoExit("mmap");
}
//////////////////
// Start streaming
//////////////////
unsigned int i;
enum v4l2_buf_type type;
struct v4l2_buffer buf;
for (i = 0; i < mNBuffers; i++)
{
memset(&buf, 0, sizeof(buf));
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
if (-1 == Xioctl(VIDIOC_QBUF, &buf))
ErrnoExit("QBUF");
}
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if (-1==Xioctl(VIDIOC_STREAMON, &type))
ErrnoExit("STREAMON");
And the last two parts in the main loop :
//////////////////
// Get frame
//////////////////
FD_ZERO(&fds);
FD_SET(mFD, &fds);
tv.tv_sec = 3;
tv.tv_usec = 0;
struct timespec t0, t1;
clock_gettime(CLOCK_REALTIME, &t0);
// This line takes about 140ms which I don't get
r = select(mFD + 1, &fds, NULL, NULL, &tv);
clock_gettime(CLOCK_REALTIME, &t1);
cout << "select time : " << ((float)(t1.tv_sec - t0.tv_sec))*1000.0f + ((float)(t1.tv_nsec - t0.tv_nsec))/1000000.0f << "\n";
if (-1 == r)
{
if (EINTR == errno)
continue;
ErrnoExit("select");
}
if (r == 0)
throw "Select timeout\n";
// Read the frame
//~ struct v4l2_buffer buf;
memset(&mCurBuf, 0, sizeof(mCurBuf));
mCurBuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
mCurBuf.memory = V4L2_MEMORY_MMAP;
// DQBUF about 2ms
if (-1 == Xioctl(VIDIOC_DQBUF, &mCurBuf))
{
if (errno == EAGAIN) continue;
ErrnoExit("DQBUF");
}
clock_gettime(CLOCK_REALTIME, &mCaptureTime);
// Manage frame in mBuffers[buf.index]
mCurBufIndex = mCurBuf.index;
break;
}
//////////////////
// Release frame
//////////////////
if (-1 == Xioctl(VIDIOC_QBUF, &mCurBuf))
ErrnoExit("VIDIOC_QBUF during mainloop");
I have been looking at the various methods of using the picamera and I'm hardly an expert, but it would seem that the default camera settings are what's holding you back. There are many modes and switches. I don't know if they are exposed through ioctls or how yet, I just started. But I had to use a program called v4l-ctl to get things ready for the mode I wanted. A deep look at that source and some code lifting should let you achieve greatness. Oh, and I doubt the select call is an issue, it's simply waiting on the descriptor which is slow to become readable. Depending on mode, etc. there can be a mandatory wait for autoexposure, etc.
Edit: I meant to say "a default setting" as you've changed some. There are also rules not codified in the driver.
The pixelformat matters. I encountered the similar low-fps problem, and I spent some time testing using my program in Go and C++ using V4L2 API. What I found is, Rpi Cam Module has good accelaration with H.264/MJPG pixelformat. I can easily get 60fps at 640*480, same as non-compressed formats like YUYV/RGB. However JPEG runs very slow. I can only get 4fps even at 320*240. And I also found the current is higher (>700mA) with JPEG compare to 500mA with H.264/MJPG.
I am decoding an OGG video (theora & vorbis as codecs) and want to show it on the screen (using Ogre 3D) while playing its sound. I can decode the image stream just fine and the video plays perfectly with the correct frame rate, etc.
However, I cannot get the sound to play at all with OpenAL.
Edit: I managed to make the playing sound resemble the actual audio in the video at least somewhat. Updated sample code.
Edit 2: I was able to get "almost" correct sound now. I had to set OpenAL to use AL_FORMAT_STEREO_FLOAT32 (after initializing the extension) instead of just STEREO16. Now the sound is "only" extremely high pitched and stuttering, but at the correct speed.
Here is how I decode audio packets (in a background thread, the equivalent works just fine for the image stream of the video file):
//------------------------------------------------------------------------------
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int bufferSize = av_samples_get_buffer_size(NULL, p_audioCodecContext->channels, p_frame->nb_samples,
p_audioCodecContext->sample_fmt, 0);
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
if (p_frame->channels == 2)
{
memcpy(frame->data, p_frame->data[0], bufferSize >> 1);
memcpy(frame->data + (bufferSize >> 1), p_frame->data[1], bufferSize >> 1);
}
else
{
memcpy(frame->data, p_frame->data, bufferSize);
}
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
So, as you can see, I decode the frame, memcpy it to my own struct, AudioFrame. Now, when the sound is played, I use these audio frame like this:
int numBuffers = 4;
ALuint buffers[4];
alGenBuffers(numBuffers, buffers);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alGenBuffers : " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
// Fill a number of data buffers with audio from the stream
std::vector<AudioFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numReturned = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffers, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
for (unsigned int i = 0; i < numReturned; ++i)
{
alBufferData(buffers[i], _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on alBufferData : " + Ogre::StringConverter::toString(success) + alGetString(success)
+ " size: " + Ogre::StringConverter::toString(audioBufferSizes[i]));
return;
}
}
// Queue the buffers into OpenAL
alSourceQueueBuffers(_source, numReturned, buffers);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error queuing streaming buffers: " + Ogre::StringConverter::toString(success) + alGetString(success));
return;
}
}
alSourcePlay(_source);
The format and frequency I give to OpenAL are AL_FORMAT_STEREO_FLOAT32 (it is a stereo sound stream, and I did initialize the FLOAT32 extension) and 48000 (which is the sample rate of the AVCodecContext of the audio stream).
And during playback, I do the following to refill OpenAL's buffers:
ALint numBuffersProcessed;
// Check if OpenAL is done with any of the queued buffers
alGetSourcei(_source, AL_BUFFERS_PROCESSED, &numBuffersProcessed);
if(numBuffersProcessed <= 0)
return;
// Fill a number of data buffers with audio from the stream
std::vector<AudiFrame*> audioBuffers;
std::vector<unsigned int> audioBufferSizes;
unsigned int numFilled = FFMPEG_PLAYER->getDecodedAudioFrames(numBuffersProcessed, audioBuffers, audioBufferSizes);
// Assign the data buffers to the OpenAL buffers
ALuint buffer;
for (unsigned int i = 0; i < numFilled; ++i)
{
// Pop the oldest queued buffer from the source,
// fill it with the new data, then re-queue it
alSourceUnqueueBuffers(_source, 1, &buffer);
ALenum success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error Unqueuing streaming buffers: " + Ogre::StringConverter::toString(success));
return;
}
alBufferData(buffer, _streamingFormat, audioBuffers[i]->data, audioBufferSizes[i], _streamingFrequency);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error on re- alBufferData: " + Ogre::StringConverter::toString(success));
return;
}
alSourceQueueBuffers(_source, 1, &buffer);
success = alGetError();
if(success != AL_NO_ERROR)
{
CONSOLE_LOG("Error re-queuing streaming buffers: " + Ogre::StringConverter::toString(success) + " "
+ alGetString(success));
return;
}
}
// Make sure the source is still playing,
// and restart it if needed.
ALint playStatus;
alGetSourcei(_source, AL_SOURCE_STATE, &playStatus);
if(playStatus != AL_PLAYING)
alSourcePlay(_source);
As you can see, I do quite heavy error checking. But I do not get any errors, neither from OpenAL nor from FFmpeg.
Edit: What I hear somewhat resembles the actual audio from the video, but VERY high pitched and stuttering VERY much. Also, it seems to be playing on top of TV noise. Very strange. Plus, it is playing much slower than the correct audio would.
Edit: 2 After using AL_FORMAT_STEREO_FLOAT32, the sound plays at the correct speed, but is still very high pitched and stuttering (though less than before).
The video itself is not broken, it can be played fine on any player. OpenAL can also play *.way files just fine in the same application, so it is also working.
Any ideas what could be wrong here or how to do this correctly?
My only guess is that somehow, FFmpeg's decode function does not produce data OpenGL can read. But this is as far as the FFmpeg decode example goes, so I don't know what's missing. As I understand it, the decode_audio4 function decodes the frame to raw data. And OpenAL should be able to work with RAW data (or rather, doesn't work with anything else).
So, I finally figured out how to do it. Gee, what a mess. It was a hint from a user on the libav-users mailing list that put me on the correct path.
Here are my mistakes:
Using the wrong format in the alBufferData function. I used AL_FORMAT_STEREO16 (as that is what every single streaming example with OpenAL uses). I should have used AL_FORMAT_STEREO_FLOAT32, as the video I stream is Ogg and vorbis is stored in floating points. And using swr_convert to convert from AV_SAMPLE_FMT_FLTP to AV_SAMPLE_FMT_S16 just crashes. No idea why.
Not using swr_convert to convert the decoded audio frame to the target format. After I was trying to use swr_convert to convert from FLTP to S16, and it would simply crash without a reason given, I assumed it was broken. But after figuring out my first mistake, I tried again, converting from FLTP to FLT (non-planar) and then it worked! So OpenAL uses interleaved format, not planar. Good to know.
So here is the decodeAudioPacket function that is working for me with Ogg video, vorbis audio stream:
int decodeAudioPacket( AVPacket& p_packet, AVCodecContext* p_audioCodecContext, AVFrame* p_frame,
SwrContext* p_swrContext, uint8_t** p_destBuffer, int p_destLinesize,
FFmpegVideoPlayer* p_player, VideoInfo& p_videoInfo)
{
// Decode audio frame
int got_frame = 0;
int decoded = avcodec_decode_audio4(p_audioCodecContext, p_frame, &got_frame, &p_packet);
if (decoded < 0)
{
p_videoInfo.error = "Error decoding audio frame.";
return decoded;
}
if(decoded <= p_packet.size)
{
/* Move the unread data to the front and clear the end bits */
int remaining = p_packet.size - decoded;
memmove(p_packet.data, &p_packet.data[decoded], remaining);
av_shrink_packet(&p_packet, remaining);
}
// Frame is complete, store it in audio frame queue
if (got_frame)
{
int outputSamples = swr_convert(p_swrContext,
p_destBuffer, p_destLinesize,
(const uint8_t**)p_frame->extended_data, p_frame->nb_samples);
int bufferSize = av_get_bytes_per_sample(AV_SAMPLE_FMT_FLT) * p_videoInfo.audioNumChannels
* outputSamples;
int64_t duration = p_frame->pkt_duration;
int64_t dts = p_frame->pkt_dts;
if (staticOgreLog)
{
staticOgreLog->logMessage("Audio frame bufferSize / duration / dts: "
+ boost::lexical_cast<std::string>(bufferSize) + " / "
+ boost::lexical_cast<std::string>(duration) + " / "
+ boost::lexical_cast<std::string>(dts), Ogre::LML_NORMAL);
}
// Create the audio frame
AudioFrame* frame = new AudioFrame();
frame->dataSize = bufferSize;
frame->data = new uint8_t[bufferSize];
memcpy(frame->data, p_destBuffer[0], bufferSize);
double timeBase = ((double)p_audioCodecContext->time_base.num) / (double)p_audioCodecContext->time_base.den;
frame->lifeTime = duration * timeBase;
p_player->addAudioFrame(frame);
}
return decoded;
}
And here is how I initialize the context and the destination buffer:
// Initialize SWR context
SwrContext* swrContext = swr_alloc_set_opts(NULL,
audioCodecContext->channel_layout, AV_SAMPLE_FMT_FLT, audioCodecContext->sample_rate,
audioCodecContext->channel_layout, audioCodecContext->sample_fmt, audioCodecContext->sample_rate,
0, NULL);
int result = swr_init(swrContext);
// Create destination sample buffer
uint8_t** destBuffer = NULL;
int destBufferLinesize;
av_samples_alloc_array_and_samples( &destBuffer,
&destBufferLinesize,
videoInfo.audioNumChannels,
2048,
AV_SAMPLE_FMT_FLT,
0);