Unix file descriptor - c++

Today I found very interesting behavior of file descriptors in Linux. Look at that code:
#include <dirent.h> /* Defines DT_* constants */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <errno.h>
#define handle_error(msg) \
do { trace(msg); exit(0); } while (0)
#define trace printf
int createFile(const char* name) {
int r;
r = ::open( name, 0 );
if (r < 0)
{
trace("create file : %s\n", name);
r = ::open( name, O_CREAT, 0666 );
if (r < 0)
trace("error r < 0 %d\n",errno);
}
return r;
}
int createDir(const char* name) {
int r = ::mkdir( name, 0777 );
if (r != 0) {
trace("error r!=0\n");
}
r = open(name, 0);
if (r < 0) {
trace("error create dir r <0\n");
}
return r;
}
struct linux_dirent {
long d_ino;
off_t d_off;
unsigned short d_reclen;
char d_name[];
};
#include <sys/types.h>
#include <dirent.h>
void test123(int fd) {
int nread;
char buf[1024];
unsigned char buffer[1024];
struct linux_dirent *d;
int bpos,r;
char d_type;
if (fd == -1)
handle_error("open");
for ( ; ; ) {
nread = syscall(SYS_getdents, fd, buf, 1024);
if (nread == -1)
handle_error("getdents");
if (nread == 0)
break;
trace("--------------- nread=%d ---------------\n", nread);
trace("i-node# file type d_reclen d_off d_name\n");
for (bpos = 0; bpos < nread;) {
d = (struct linux_dirent *) (buf + bpos);
trace("%8ld ", d->d_ino);
d_type = *(buf + bpos + d->d_reclen - 1);
trace("%4d %10lld %s\n", d->d_reclen,
(long long) d->d_off, d->d_name);
bpos += d->d_reclen;
}
}
}
int main(int argc, const char * argv[]) {
int dir = createDir("test");
int file = createFile("test/file.gg");
test123(dir);
close(dir);
close(file);
return 0;
}
in that code I create folder, save its file descriptor, create file in that folder and after I want to print all files in that directory via file descriptors. However I get this output:
create file : test/file.gg
--------------- nread=32 ---------------
i-node# file type d_reclen d_off d_name
48879 16 1 .
48880 16 2 ..
There is no file.gg file in that folder. So, my question is - how it can be and how to work correctly with file descriptors? As I understand file descriptor is just an index in local for process table with all opened files and directories. But it is looks like that folder descriptor caches somehow files in that folder.
How to work correctly with descriptors in my case?

Try to do an fsync on your directory. You should open directory with O_RDONLY flags. O_WRONLY will fail. Create a file and sync may not sync metadata for this file. More informations in this article

Related

multithreaded file copying

I wrote a c++ code to multithreaded copying a file to another directory in linux. but doesn't work(just it made an empty file in directory).
I don't know what's the problem? I think my tread has no right access to write in the shared file. but don't know what should I do.
It should work when typed in terminal :
$./a.out <file name> <dir> <thread number (default 4)>
This is my code:
/*Multithreads file copier*/
#include <fcntl.h>
#include <pthread.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <sys/sendfile.h>
#include <unistd.h>
#include <cstdio>
char* file;
char* fileout_path;
int fin, fout;
// part of each thread
struct PART{
off_t* offset =0;
size_t size;
};
//multithreading
void *Copy (void * data)
{
struct PART *mypart;
mypart = (struct PART *) data;
//open file to read and write
fin = open(file, O_RDONLY,0);
fout = open(fileout_path, O_WRONLY|O_CREAT, 0644);
unsigned long a = static_cast<unsigned long>(mypart->size);
lseek(fout, a, SEEK_SET); //set offset by size of the part
//use sendfile instead read and write to easier code
sendfile(fin, fout, mypart->offset, mypart->size);
printf("threading....\n");//to know the thread ran
pthread_exit(0);
}
int main(int argc, char *argv[])
{
int threads_number;
if (argv[3]!= NULL)
{
threads_number = atoi(argv[3]);
}
else
{
threads_number = 4;//default thread number
}
//multithreading datatypes
pthread_t tid[threads_number];
pthread_attr_t attr;
pthread_attr_init(&attr);
struct stat f_stat;
struct PART part[threads_number];
//allocation size of each part
unsigned long part_size = f_stat.st_size / threads_number;
for(int i =0; i <number_threads; i++)
{
if ( i == threads_number -1)
{
part[threads_number].size = f_stat.st_size - (part_size * (threads_number -1));
}
else
{
part[i].size = part_size;
}
}
file = argv[1];
stat(file, &f_stat);
fileout_path = argv[2];
int fin1 = open(file, O_RDONLY,0);
int fout1 = open(fileout_path, O_WRONLY|O_CREAT, 0644);
for (int j = 0; j < threads_number; j++)
{
pthread_create(&tid[j], NULL, Copy, (void *)&part[j]);
pthread_join(tid[j],NULL);
}
printf("thread is done.\n");
close(fout);
close(fin);
return 0;
}

Linux - Get window title from pid

I am making an app blocker with c++ and I need it to block urls. To block urls it will get the window name of the current tab that it is focused on. Here is the code I've got so far:
#include <iostream>
#include <glob.h>
#include <cstdio>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
using namespace std;
pid_t find_pid(const char *process_name)
{
pid_t pid = -1;
glob_t pglob;
char *procname, *readbuf;
int buflen = strlen(process_name) + 2;
unsigned i;
/* Get a list of all comm files. man 5 proc */
if (glob("/proc/*/comm", 0, NULL, &pglob) != 0)
return pid;
/* The comm files include trailing newlines, so... */
procname = static_cast<char*>(malloc(buflen));
strcpy(procname, process_name);
procname[buflen - 2] = '\n';
procname[buflen - 1] = 0;
/* readbuff will hold the contents of the comm files. */
readbuf = static_cast<char*>(malloc(buflen));
for (i = 0; i < pglob.gl_pathc; ++i) {
FILE *comm;
char *ret;
/* Read the contents of the file. */
if ((comm = fopen(pglob.gl_pathv[i], "r")) == NULL)
continue;
ret = fgets(readbuf, buflen, comm);
fclose(comm);
if (ret == NULL)
continue;
/*
If comm matches our process name, extract the process ID from the
path, convert it to a pid_t, and return it.
*/
if (strcmp(readbuf, procname) == 0) {
pid = (pid_t)atoi(pglob.gl_pathv[i] + strlen("/proc/"));
break;
}
}
/* Clean up. */
free(procname);
free(readbuf);
globfree(&pglob);
return pid;
}
int main(int argc, char const *argv[])
{
cout << find_pid("brave") << endl;
return 0;
}
I copied and modified code from this post - linux - get pid of process. Please help me.

inotify_add_watch fails with no such file or directory

I am trying to watch for the creation of file in my c/c++ program. I am trying to use inotify for this purpose. However, I am getting a no such file or directory when I make the inotify_add_watch() call in my code. I am running my program on an Ubuntu 16.04 machine. The machine is running in the EC2 cloud. Can someone tell me the possible reasons for receiving a no such file or directory error?
According to the man page for inotify_add_watch, that's not even one of the possible error codes. I've made to sure I have proper read permissions for the file I am trying to monitor etc.
Here's my test program:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/inotify.h>
#include <limits.h>
#define MAX_EVENTS 1024
#define LEN_NAME 16
#define EVENT_SIZE (sizeof (struct inotify_event))
#define BUF_LEN (MAX_EVENTS * (EVENT_SIZE + LEN_NAME))
int
main(int argc, char **argv)
{
int length, i = 0, wd;
int fd;
char buffer[BUF_LEN];
/* Initialize Inotify*/
fd = inotify_init();
if (fd < 0) {
perror("Couldn't initialize inotify");
}
/* add watch to starting directory */
wd = inotify_add_watch(fd, argv[1], IN_CREATE | IN_MODIFY | IN_DELETE);
if (wd == -1) {
printf("Couldn't add watch to %s. errno=%d\n", argv[1], errno);
return -1;
} else {
printf("Watching:: %s\n",argv[1]);
}
/* do it forever*/
while (1) {
i = 0;
length = read(fd, buffer, BUF_LEN);
if (length < 0) {
perror("read");
}
while (i < length) {
struct inotify_event *event = (struct inotify_event *) &buffer[i];
if (event->len) {
if (event->mask & IN_CREATE) {
printf("Create event. file=%s, wf=%d\n", event->name, event->wd);
}
if (event->mask & IN_MODIFY) {
printf("Modify event. file=%s, wf=%d\n", event->name, event->wd);
}
if (event->mask & IN_DELETE) {
printf("Delete event. file=%s, wf=%d\n", event->name, event->wd);
}
i += EVENT_SIZE + event->len;
}
}
}
/* Clean up*/
inotify_rm_watch(fd, wd);
close(fd);
return 0;
}
If you want to monitor the creation of file/directory, you should watch the parent directory since the new file/directory does not exist when you calls inotify_add_watch().
Then when any file/directory is created in your watching directory, you will get a event, and the new file/direcotry name will be in event->name.

Enter all subfolders - Recursive

How can I write a program that enters all of a folder' subfolders?
I wrote some code, but it does not enter the subfolders.
void main(int argc, char *argv[])
{
char* dirPath = argv[1];
struct stat statbuf;
DIR *dir;
struct dirent *ent;
size_t arglen = strlen(argv[1]);
if ((dir = opendir (dirPath)) != NULL) {
while ((ent = readdir (dir)) != NULL) {
printf(ent->d_name, "%s\n");
}
closedir (dir);
} else {
perror ("Problem");
}
}
I tried using the stat() function recursively.
http://www.lemoda.net/c/recursive-directory/
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <errno.h>
/* "readdir" etc. are defined here. */
#include <dirent.h>
/* limits.h defines "PATH_MAX". */
#include <limits.h>
/* List the files in "dir_name". */
static void
list_dir (const char * dir_name)
{
DIR * d;
/* Open the directory specified by "dir_name". */
d = opendir (dir_name);
/* Check it was opened. */
if (! d) {
fprintf (stderr, "Cannot open directory '%s': %s\n",
dir_name, strerror (errno));
exit (EXIT_FAILURE);
}
while (1) {
struct dirent * entry;
const char * d_name;
/* "Readdir" gets subsequent entries from "d". */
entry = readdir (d);
if (! entry) {
/* There are no more entries in this directory, so break
out of the while loop. */
break;
}
d_name = entry->d_name;
/* Print the name of the file and directory. */
printf ("%s/%s\n", dir_name, d_name);
#if 0
/* If you don't want to print the directories, use the
following line: */
if (! (entry->d_type & DT_DIR)) {
printf ("%s/%s\n", dir_name, d_name);
}
#endif /* 0 */
if (entry->d_type & DT_DIR) {
/* Check that the directory is not "d" or d's parent. */
if (strcmp (d_name, "..") != 0 &&
strcmp (d_name, ".") != 0) {
int path_length;
char path[PATH_MAX];
path_length = snprintf (path, PATH_MAX,
"%s/%s", dir_name, d_name);
printf ("%s\n", path);
if (path_length >= PATH_MAX) {
fprintf (stderr, "Path length has got too long.\n");
exit (EXIT_FAILURE);
}
/* Recursively call "list_dir" with the new path. */
list_dir (path);
}
}
}
/* After going through all the entries, close the directory. */
if (closedir (d)) {
fprintf (stderr, "Could not close '%s': %s\n",
dir_name, strerror (errno));
exit (EXIT_FAILURE);
}
}
int main ()
{
list_dir ("/usr/share/games");
return 0;
}
Another example, using file tree walk (ftw or nftw)
These have the advantage of providing your own callback funtion with a struct stat, a filename and a type (FTW_D, etc.) Call fnmatch to eliminate unwanted entries. Files that start with a "." are "hidden files". ls does not show them by default. ls -a will list them.
#include <sys/types.h>
#include <stdlib.h>
#include <stdio.h>
#include <ftw.h>
int callback(const char *fname,
const struct stat *st,
int type,
struct FTW *ftw)
{
// call fnmatch() here or use type to decide about printing
// printf file name and type , ??? on stat error FTW_NS, default to "????"
printf("%s\n", fname);
return 0;
}
int main(int argc, char **argv)
{
int fd_max=8; // max file desriptors
int retval=nftw( (argc==1)?"." :argv[1], callback, fd_max, FTW_ANYERR);
return retval;
}

Why is MD5Sum so fast

I've been studying hashing in C/C++ and tried to replicate the md5sum command in Linux. After analysing the source code, it seems that md5sum relies on the md5 library's md5_stream. I've approximated the md5_stream function from the md5.h library into the code below, and it runs in ~13-14 seconds. I've tried to call the md5_stream function directly and got ~13-14 seconds. The md5sum runs in 4 seconds. What have the GNU people done to get the speed out of the code?
The md5.h/md5.c code is available in the CoreUtils source code.
#include <QtCore/QCoreApplication>
#include <QtCore/QDebug>
#include <iostream>
#include <iomanip>
#include <fstream>
#include "md5.h"
#define BLOCKSIZE 32784
int main()
{
FILE *fpinput, *fpoutput;
if ((fpinput = fopen("/dev/sdb", "rb")) == 0) {
throw std::runtime_error("input file doesn't exist");
}
struct md5_ctx ctx;
size_t sum;
char *buffer = (char*)malloc (BLOCKSIZE + 72);
unsigned char *resblock = (unsigned char*)malloc (16);
if (!buffer)
return 1;
md5_init_ctx (&ctx);
size_t n;
sum = 0;
while (!ferror(fpinput) && !feof(fpinput)) {
n = fread (buffer + sum, 1, BLOCKSIZE - sum, fpinput);
if (n == 0){
break;
}
sum += n;
if (sum == BLOCKSIZE) {
md5_process_block (buffer, BLOCKSIZE, &ctx);
sum = 0;
}
}
if (n == 0 && ferror (fpinput)) {
free (buffer);
return 1;
}
/* Process any remaining bytes. */
if (sum > 0){
md5_process_bytes (buffer, sum, &ctx);
}
/* Construct result in desired memory. */
md5_finish_ctx (&ctx, resblock);
free (buffer);
for (int x = 0; x < 16; ++x){
std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast<uint16_t>(resblock[x]);
std::cout << " ";
}
std::cout << std::endl;
free(resblock);
return 0;
}
EDIT: Was a default mkspec problem in Fedora 19 64-bit.
fread() is convenient, but don't use fread() if you care about performance. fread() will copy from the OS to a libc buffer, then to your buffer. This extra copying cost CPU cycles and cache.
For better performance use open() then read() to avoid the extra copy. Make sure your read() calls are multiples of the block size, but lower than your CPU cache size.
For best performance use mmap() map the disk directly to RAM.
If you try something like the below code, it should go faster.
// compile gcc mmap_md5.c -lgcrypt
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <gcrypt.h>
#include <linux/fs.h> // ioctl
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
int main(int argc, char *argv[])
{
char *addr;
int fd;
struct stat sb;
off_t offset, pa_offset;
size_t length;
ssize_t s;
unsigned char digest[16];
char digest_ascii[32+1] = {0,};
int digest_length = gcry_md_get_algo_dlen (GCRY_MD_MD5);
int i;
if (argc < 3 || argc > 4) {
fprintf(stderr, "%s file offset [length]\n", argv[0]);
exit(EXIT_FAILURE);
}
fd = open(argv[1], O_RDONLY);
if (fd == -1)
handle_error("open");
if (fstat(fd, &sb) == -1) /* To obtain file size */
handle_error("fstat");
offset = atoi(argv[2]);
pa_offset = offset & ~(sysconf(_SC_PAGE_SIZE) - 1);
if (sb.st_mode | S_IFBLK ) {
// block device. use ioctl to find length
ioctl(fd, BLKGETSIZE64, &length);
} else {
/* offset for mmap() must be page aligned */
if (offset >= sb.st_size) {
fprintf(stderr, "offset is past end of file size=%zd, offset=%d\n", sb.st_size, (int) offset);
exit(EXIT_FAILURE);
}
if (argc == 4) {
length = atoi(argv[3]);
if (offset + length > sb.st_size)
length = sb.st_size - offset;
/* Canaqt display bytes past end of file */
} else { /* No length arg ==> display to end of file */
length = sb.st_size - offset;
}
}
printf("length= %zd\n", length);
addr = mmap(NULL, length + offset - pa_offset, PROT_READ,
MAP_PRIVATE, fd, pa_offset);
if (addr == MAP_FAILED)
handle_error("mmap");
gcry_md_hash_buffer(GCRY_MD_MD5, digest, addr + offset - pa_offset, length);
for (i=0; i < digest_length; i++) {
sprintf(digest_ascii+(i*2), "%02x", digest[i]);
}
printf("hash=%s\n", digest_ascii);
exit(EXIT_SUCCESS);
}
It turned out to be an error in the Qt mkspecs regarding an optimization flag not being set properly.