Read from a file that is continuously being update

2020-02-12 07:24发布

问题:

I am writing some C code to process some data in a file, but I just learned that the file is going to be constantly added to (about 1 time/second, maybe faster). So I'm wondering how do I keep reading from the file as its being added to. Then when I get to the end, wait until the next line is added and then process it. Then wait again and then process, and so on and so on. I have something like:

while(1){
    fgets(line, sizeof(line), file);
    while(line == NULL){
       //wait ?  then try to read again?
    }
    //tokenize line and do my stuff here
}

I thought I could maybe use inotify, but I am getting nowhere with that. Does anyone have any advice?

回答1:

The most efficient way is using inotify, and the direct way is using the read() system call directly.

using inotify

The following code may give you some help, It works well on Debian 7.0, GCC 4.7:

/*This is the sample program to notify us for the file creation and file deletion takes place in “/tmp/test_inotify” file*/
// Modified from: http://www.thegeekstuff.com/2010/04/inotify-c-program-example/
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/inotify.h>

#define EVENT_SIZE  ( sizeof (struct inotify_event) )
#define EVENT_BUF_LEN     ( 1024 * ( EVENT_SIZE + 16 ) )

int main( )
{
  int length, i = 0;
  int fd;
  int wd;
  char buffer[EVENT_BUF_LEN];

  /*creating the INOTIFY instance*/
  fd = inotify_init();
  /*checking for error*/
  if ( fd < 0 ) {
    perror( "inotify_init error" );
  }

  /* adding the “/tmp/test_inotify” test into watch list. Here, 
   * the suggestion is to validate the existence of the 
   * directory before adding into monitoring list.
   */
  wd = inotify_add_watch( fd, "/tmp/test_inotify", IN_CREATE | IN_DELETE | IN_ACCESS | IN_MODIFY | IN_OPEN );

  /* read to determine the event change happens on “/tmp/test_inotify” file. 
   * Actually this read blocks until the change event occurs
   */ 
  length = read( fd, buffer, EVENT_BUF_LEN ); 
  /* checking for error */
  if ( length < 0 ) {
    perror( "read" );
  }  

  /* actually read return the list of change events happens. 
   *  Here, read the change event one by one and process it accordingly.
   */
  while ( i < length ) {
    struct inotify_event *event = ( struct inotify_event * ) &buffer[ i ];
    if( event->len == 0) {
      // For a single file watching, the event->name is empty, and event->len = 0
      printf(" Single file watching event happened\n");
    } else if ( event->len ) {
      if ( event->mask & IN_CREATE ) {
        if ( event->mask & IN_ISDIR ) {
          printf( "New directory %s created.\n", event->name );
        } else {
          printf( "New file %s created.\n", event->name );
        }
      } else if ( event->mask & IN_DELETE ) {
        if ( event->mask & IN_ISDIR ) {
          printf( "Directory %s deleted.\n", event->name );
        } else {
          printf( "File %s deleted.\n", event->name );
        }
      } else if( event->mask & IN_ACCESS ) {
        if ( event->mask & IN_ISDIR ) {
          printf( "Directory %s accessed.\n", event->name );
        } else {
      printf(" File %s accessed. \n", event->name );
        }
      } else if( event->mask & IN_MODIFY ) {
        if ( event->mask & IN_ISDIR ) {
          printf( "Directory %s modified.\n", event->name );
        } else {
      printf(" File %s modified. \n", event->name );
        }
      } else if( event->mask & IN_OPEN ) {
        if ( event->mask & IN_ISDIR ) {
          printf( "Directory %s opened.\n", event->name );
        } else {
      printf(" File %s opened. \n", event->name );
        }
      } else {
    printf( "Directory or File is accessed by other mode\n");
      }
    }
    i += EVENT_SIZE + event->len;
  }

  /* removing the “/tmp/test_inotify” directory from the watch list. */
  inotify_rm_watch( fd, wd );

  /* closing the INOTIFY instance */
  close( fd );

}

When runing the above program. You could test it by create a file or directoy named /tmp/test_inotify.

A detailed explanation could be found here

Use read system call

If a file is open, and have read to the end of current file size. the read() system call will return 0. And if some writer wrote N bytes to this file later, and then the read() will just return min(N, buffersize).

So it works correctly for your circumstance. Following is an examples of the code.

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

typedef int FD ;

int main() {
  FD filed = open("/tmp/test_inotify", O_RDWR );
  char buf[128];

  if( !filed ) {
    printf("Openfile error\n");
    exit(-1);
  }

  int nbytes;
  while(1) {
    nbytes = read(filed, buf, 16);
    printf("read %d bytes from file.\n", nbytes);
    if(nbytes > 0) {
      split_buffer_by_newline(buf); // split buffer by new line.
    }
    sleep(1);
  }
  return 0;
}

Reference

  • Thanks to Jonathan Leffler's Comment
  • http://www.thegeekstuff.com/2010/04/inotify-c-program-example/


回答2:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int
main()
{

    char            ch;
    FILE           *fp;
    long int        nbytes_read = 0;
    char            str       [128];
    int             j = 0;
    int             first_time = 1;
    memset(str, '\0', 128);
    fp = fopen("file.txt", "r");
    while (1) {
            if (first_time != 1) {
                    fp = fopen("file.txt", "r");
                    fseek(fp, nbytes_read, SEEK_SET);
                    sleep(10);

            }
            if (fp != NULL) {
                    while ((ch = fgetc(fp)) != EOF) {
                            if (ch == '\n') {
                                    str[j++] = ch;
                                    printf("%s", str);
                                    memset(str, '\0', 128);
                                    j = 0;
                            } else {
                                    str[j++] = ch;
                            }
                            nbytes_read++;


                    }
                    //printf("%ld\n", nbytes_read);
                    first_time = 0;
            }
            fclose(fp);
    }
    return 0;
}


回答3:

You can use select() with the fileno(file) as the file-descriptor. select will return either with a timeout (if you set a timeout) or when you can read from the file.



回答4:

Using select can be a good choice but if you do not wish to use it, you can add a sleep for a small amount of milliseconds before reading value.



标签: c linux file-io