Tuesday, December 4, 2012

Write and fprintf for file I/O

fprintf() does buffered I/O, where as write() does unbuffered I/O. So once the write() completes, the data is in the file, whereas, for fprintf() it may take a while for the file to get updated to reflect the output. This results in a significant performance difference - the write works at disk speed. The following is a program to test this:

#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>

static double s_time;

void starttime()
{
  s_time=1.0*gethrtime();
}

void endtime(long its)
{
  double e_time=1.0*gethrtime();
  printf("Time per iteration %5.2f MB/s\n", (1.0*its)/(e_time-s_time*1.0)*1000);
  s_time=1.0*gethrtime();
}

#define SIZE 10*1024*1024

void test_write()
{
  starttime();
  int file = open("./test.dat",O_WRONLY|O_CREAT,S_IWGRP|S_IWOTH|S_IWUSR);
  for (int i=0; i<SIZE; i++)
  {
    write(file,"a",1);
  }
  close(file);
  endtime(SIZE);
}

void test_fprintf()
{
  starttime();
  FILE* file = fopen("./test.dat","w");
  for (int i=0; i<SIZE; i++)
  {
    fprintf(file,"a");
  }
  fclose(file);
  endtime(SIZE);
}

void test_flush()
{
  starttime();
  FILE* file = fopen("./test.dat","w");
  for (int i=0; i<SIZE; i++)
  {
    fprintf(file,"a");
    fflush(file);
  }
  fclose(file);
  endtime(SIZE);
}


int main()
{
  test_write();
  test_fprintf();
  test_flush();
}

Compiling and running I get 0.2MB/s for write() and 6MB/s for fprintf(). A large difference. There's three tests in this example, the third test uses fprintf() and fflush(). This is equivalent to write() both in performance and in functionality. Which leads to the suggestion that fprintf() (and other buffering I/O functions) are the fastest way of writing to files, and that fflush() should be used to enforce synchronisation of the file contents.