C Programming: Low-Level Mastery
HomeInsightsCoursesC ProgrammingRandom Access & Buffering
File I/O

Advanced File I/O

Master advanced file I/O techniques: buffering strategies, memory-mapped files, file descriptors, unbuffered I/O, stdio alternatives, and performance optimization for high-throughput file operations.

Understanding Buffering

stdio functions use internal buffers to reduce system calls. Full buffering accumulates data until buffer fills. Line buffering writes on newlines. No buffering writes immediately. Understanding buffering helps optimize performance and control when data persists.

C
#include <stdio.h>

/* Buffer modes:
   _IOFBF - Full buffering (default for files)
   _IOLBF - Line buffering (default for terminals)
   _IONBF - No buffering (default for stderr)
*/

/* Set buffer mode */
void buffering_example(void) {
    FILE *file = fopen("output.txt", "w");
    if (file == NULL) {
        return;
    }
    
    /* Disable buffering */
    setbuf(file, NULL);
    /* Or: setvbuf(file, NULL, _IONBF, 0); */
    
    /* Data written immediately */
    fprintf(file, "Immediate write\n");
    
    fclose(file);
}

/* Custom buffer */
void custom_buffer_example(void) {
    FILE *file = fopen("output.txt", "w");
    if (file == NULL) {
        return;
    }
    
    /* Allocate buffer */
    char buffer[8192];
    
    /* Set full buffering with custom buffer */
    setvbuf(file, buffer, _IOFBF, sizeof(buffer));
    
    /* Writes accumulate in buffer */
    for (int i = 0; i < 1000; i++) {
        fprintf(file, "Line %d\n", i);
    }
    
    /* Flush explicitly */
    fflush(file);
    
    fclose(file);
}

/* Line buffering */
void line_buffering_example(void) {
    FILE *file = fopen("log.txt", "w");
    if (file == NULL) {
        return;
    }
    
    /* Line buffering (writes on \n) */
    setvbuf(file, NULL, _IOLBF, 0);
    
    fprintf(file, "This writes immediately\n");  /* Written */
    fprintf(file, "This waits...");               /* Buffered */
    fprintf(file, "...until newline\n");        /* Now written */
    
    fclose(file);
}

/* Buffering performance comparison */
void compare_buffering(void) {
    FILE *file;
    
    /* Unbuffered (slow) */
    file = fopen("test1.txt", "w");
    setvbuf(file, NULL, _IONBF, 0);
    
    for (int i = 0; i < 10000; i++) {
        fputc('X', file);  /* 10000 system calls */
    }
    
    fclose(file);
    
    /* Buffered (fast) */
    file = fopen("test2.txt", "w");
    /* Default buffering */
    
    for (int i = 0; i < 10000; i++) {
        fputc('X', file);  /* Few system calls */
    }
    
    fclose(file);
}

/* Manual flushing */
void manual_flush_example(void) {
    FILE *file = fopen("data.txt", "w");
    if (file == NULL) {
        return;
    }
    
    fprintf(file, "Important data\n");
    fflush(file);  /* Force write now */
    
    /* Even if crash happens, data is saved */
    
    fprintf(file, "More data\n");
    
    fclose(file);  /* Flushes automatically */
}

/* Check buffer size */
void check_buffer_info(FILE *file) {
    /* Get buffer info (implementation-dependent) */
    /* Most stdio implementations don't provide standard way */
    
    /* Typical buffer size: 4096 or 8192 bytes */
    
    printf("Buffer mode for stdout: ");
    if (stdout-&gt;_flags & _IO_UNBUFFERED) {
        printf("Unbuffered\n");
    } else if (stdout-&gt;_flags & _IO_LINE_BUF) {
        printf("Line buffered\n");
    } else {
        printf("Fully buffered\n");
    }
}

File Descriptors and Low-Level I/O

FILE* is high-level with buffering. File descriptors are low-level integers for direct system calls. Use open(), read(), write(), close() for unbuffered I/O. fileno() converts FILE* to descriptor. fdopen() converts back. Mix carefully - buffering conflicts.

C
#include <fcntl.h>
#include <unistd.h>

/* Low-level file operations */

/* Open file (low-level) */
int low_level_open_example(void) {
    /* open() returns file descriptor */
    int fd = open("file.txt", O_RDONLY);
    
    if (fd == -1) {
        perror("open");
        return -1;
    }
    
    /* Use fd... */
    
    close(fd);
    return 0;
}

/* Open flags */
/*
   O_RDONLY  - Read only
   O_WRONLY  - Write only
   O_RDWR    - Read/write
   O_CREAT   - Create if doesn't exist
   O_TRUNC   - Truncate to zero
   O_APPEND  - Append mode
   O_EXCL    - Fail if exists (with O_CREAT)
   O_SYNC    - Synchronous writes
   O_NONBLOCK - Non-blocking I/O
*/

void open_flags_examples(void) {
    int fd;
    
    /* Create new file or truncate existing */
    fd = open("new.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd != -1) {
        close(fd);
    }
    
    /* Open for append */
    fd = open("log.txt", O_WRONLY | O_CREAT | O_APPEND, 0644);
    if (fd != -1) {
        close(fd);
    }
    
    /* Create exclusively (fail if exists) */
    fd = open("unique.txt", O_WRONLY | O_CREAT | O_EXCL, 0644);
    if (fd == -1 && errno == EEXIST) {
        printf("File already exists\n");
    } else if (fd != -1) {
        close(fd);
    }
}

/* Read from file descriptor */
void low_level_read_example(void) {
    int fd = open("data.txt", O_RDONLY);
    if (fd == -1) {
        return;
    }
    
    char buffer[1024];
    ssize_t bytes_read = read(fd, buffer, sizeof(buffer));
    
    if (bytes_read == -1) {
        perror("read");
    } else if (bytes_read == 0) {
        printf("EOF\n");
    } else {
        printf("Read %zd bytes\n", bytes_read);
    }
    
    close(fd);
}

/* Write to file descriptor */
void low_level_write_example(void) {
    int fd = open("output.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd == -1) {
        return;
    }
    
    const char *message = "Hello, World!\n";
    ssize_t bytes_written = write(fd, message, strlen(message));
    
    if (bytes_written == -1) {
        perror("write");
    } else {
        printf("Wrote %zd bytes\n", bytes_written);
    }
    
    close(fd);
}

/* Convert FILE* to file descriptor */
void fileno_example(void) {
    FILE *file = fopen("test.txt", "r");
    if (file == NULL) {
        return;
    }
    
    int fd = fileno(file);
    printf("File descriptor: %d\n", fd);
    
    /* Can use fd for low-level operations */
    /* But be careful with buffering! */
    
    fclose(file);  /* Also closes fd */
}

/* Convert file descriptor to FILE* */
void fdopen_example(void) {
    int fd = open("data.txt", O_RDONLY);
    if (fd == -1) {
        return;
    }
    
    /* Convert to FILE* */
    FILE *file = fdopen(fd, "r");
    if (file == NULL) {
        close(fd);
        return;
    }
    
    /* Now can use stdio functions */
    char buffer[100];
    fgets(buffer, sizeof(buffer), file);
    
    fclose(file);  /* Also closes fd */
}

/* Mixing buffered and unbuffered I/O */
void mixing_io_caveat(void) {
    FILE *file = fopen("test.txt", "r+");
    if (file == NULL) {
        return;
    }
    
    /* Buffered write */
    fprintf(file, "Buffered\n");
    
    /* Get file descriptor */
    int fd = fileno(file);
    
    /* WRONG: Data still in buffer! */
    write(fd, "Direct\n", 7);
    
    /* RIGHT: Flush first */
    fflush(file);  /* Write buffered data */
    write(fd, "Direct\n", 7);  /* Now safe */
    
    fclose(file);
}

Memory-Mapped Files

Memory-mapped I/O maps files directly into process address space. Access file contents like regular memory - no read()/write() calls needed. OS handles paging automatically. Excellent for large files and random access. Use mmap() on Unix, CreateFileMapping() on Windows.

C
#ifdef __unix__
#include <sys/mman.h>

/* Memory-mapped file example */
void mmap_example(void) {
    int fd = open("large_file.dat", O_RDONLY);
    if (fd == -1) {
        perror("open");
        return;
    }
    
    /* Get file size */
    off_t size = lseek(fd, 0, SEEK_END);
    
    /* Map file into memory */
    void *addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
    
    if (addr == MAP_FAILED) {
        perror("mmap");
        close(fd);
        return;
    }
    
    /* Can close fd now */
    close(fd);
    
    /* Access file contents like array */
    unsigned char *data = (unsigned char*)addr;
    
    /* Read byte at position 1000 */
    unsigned char byte = data[1000];
    
    /* Process data... */
    for (off_t i = 0; i < size; i++) {
        /* Access data[i] directly */
    }
    
    /* Unmap when done */
    munmap(addr, size);
}

/* Write to memory-mapped file */
void mmap_write_example(void) {
    int fd = open("output.dat", O_RDWR | O_CREAT, 0644);
    if (fd == -1) {
        return;
    }
    
    size_t size = 1024 * 1024;  /* 1 MB */
    
    /* Set file size */
    if (ftruncate(fd, size) == -1) {
        perror("ftruncate");
        close(fd);
        return;
    }
    
    /* Map for read/write */
    void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
    
    if (addr == MAP_FAILED) {
        perror("mmap");
        close(fd);
        return;
    }
    
    close(fd);
    
    /* Write to mapped memory */
    unsigned char *data = (unsigned char*)addr;
    
    for (size_t i = 0; i < size; i++) {
        data[i] = i % 256;  /* Writes directly to file */
    }
    
    /* Ensure changes written to disk */
    msync(addr, size, MS_SYNC);
    
    munmap(addr, size);
}

/* mmap advantages */
/*
   - Fast random access
   - No explicit read/write calls
   - OS handles paging
   - Multiple processes can share
   - Lazy loading (only accessed pages loaded)
*/

/* mmap disadvantages */
/*
   - Platform-specific
   - Address space limitations
   - Page-aligned only
   - Error handling complex
   - Not suitable for sequential I/O
*/

/* Efficient file processing with mmap */
uint64_t count_bytes_mmap(const char *filename, unsigned char target) {
    int fd = open(filename, O_RDONLY);
    if (fd == -1) {
        return 0;
    }
    
    off_t size = lseek(fd, 0, SEEK_END);
    void *addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
    close(fd);
    
    if (addr == MAP_FAILED) {
        return 0;
    }
    
    unsigned char *data = (unsigned char*)addr;
    uint64_t count = 0;
    
    for (off_t i = 0; i < size; i++) {
        if (data[i] == target) {
            count++;
        }
    }
    
    munmap(addr, size);
    return count;
}

/* Shared memory using mmap */
void* create_shared_memory(size_t size) {
    return mmap(NULL, size,
                PROT_READ | PROT_WRITE,
                MAP_SHARED | MAP_ANONYMOUS,
                -1, 0);
}
#endif

Performance Optimization

File I/O performance depends on buffer sizes, access patterns, and system calls. Large buffers reduce overhead. Sequential access is faster than random. Batch operations minimize calls. Understanding these principles helps optimize throughput.

C
/* Optimal buffer size */
#define OPTIMAL_BUFFER_SIZE 65536  /* 64 KB */

/* Fast file copy */
int fast_copy(const char *src, const char *dest) {
    FILE *in = fopen(src, "rb");
    FILE *out = fopen(dest, "wb");
    
    if (in == NULL || out == NULL) {
        if (in) fclose(in);
        if (out) fclose(out);
        return -1;
    }
    
    /* Large buffer */
    char *buffer = malloc(OPTIMAL_BUFFER_SIZE);
    if (buffer == NULL) {
        fclose(in);
        fclose(out);
        return -1;
    }
    
    size_t bytes;
    while ((bytes = fread(buffer, 1, OPTIMAL_BUFFER_SIZE, in)) &gt; 0) {
        fwrite(buffer, 1, bytes, out);
    }
    
    free(buffer);
    fclose(in);
    fclose(out);
    return 0;
}

/* Sequential vs random access */
void access_pattern_comparison(void) {
    FILE *file = fopen("large_file.dat", "rb");
    if (file == NULL) {
        return;
    }
    
    char buffer[4096];
    
    /* Sequential (fast) */
    while (fread(buffer, 1, sizeof(buffer), file) &gt; 0) {
        /* Process buffer */
    }
    
    /* Random (slow) */
    for (int i = 0; i < 1000; i++) {
        fseek(file, rand() % 1000000, SEEK_SET);
        fread(buffer, 1, sizeof(buffer), file);
    }
    
    fclose(file);
}

/* Batching writes */
void batch_writes_example(void) {
    FILE *file = fopen("output.txt", "w");
    if (file == NULL) {
        return;
    }
    
    /* BAD: Many small writes */
    for (int i = 0; i < 10000; i++) {
        fprintf(file, "%d\n", i);  /* 10000 buffer operations */
    }
    
    /* BETTER: Build buffer, write once */
    char buffer[100000];
    char *ptr = buffer;
    
    for (int i = 0; i < 10000; i++) {
        ptr += sprintf(ptr, "%d\n", i);
    }
    
    fwrite(buffer, 1, ptr - buffer, file);  /* Single write */
    
    fclose(file);
}

/* Avoid unnecessary flushing */
void minimize_flushes(void) {
    FILE *file = fopen("data.txt", "w");
    if (file == NULL) {
        return;
    }
    
    /* BAD: Flushing after each write */
    for (int i = 0; i < 1000; i++) {
        fprintf(file, "%d\n", i);
        fflush(file);  /* Slow! */
    }
    
    /* GOOD: Let buffering work */
    for (int i = 0; i < 1000; i++) {
        fprintf(file, "%d\n", i);
    }
    /* Automatic flush on close */
    
    fclose(file);
}

/* Read-ahead for sequential access */
void prefetch_optimization(void) {
    FILE *file = fopen("data.dat", "rb");
    if (file == NULL) {
        return;
    }
    
#ifdef __linux__
    /* Advise kernel about access pattern */
    int fd = fileno(file);
    posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
    
    /* Process file... */
    
    fclose(file);
}

/* Direct I/O (bypass cache) */
#ifdef __linux__
void direct_io_example(void) {
    /* For database-like applications */
    int fd = open("database.dat", O_RDWR | O_DIRECT);
    
    if (fd == -1) {
        return;
    }
    
    /* Must use aligned buffers */
    void *buffer;
    posix_memalign(&buffer, 4096, 4096);
    
    read(fd, buffer, 4096);
    
    free(buffer);
    close(fd);
}
#endif

/* Async I/O (advanced) */
#ifdef __unix__
#include <aio.h>

void async_io_example(void) {
    int fd = open("file.dat", O_RDONLY);
    if (fd == -1) {
        return;
    }
    
    struct aiocb cb;
    char buffer[4096];
    
    memset(&cb, 0, sizeof(cb));
    cb.aio_fildes = fd;
    cb.aio_buf = buffer;
    cb.aio_nbytes = sizeof(buffer);
    cb.aio_offset = 0;
    
    /* Start async read */
    aio_read(&cb);
    
    /* Do other work while I/O happens */
    
    /* Wait for completion */
    while (aio_error(&cb) == EINPROGRESS) {
        /* Still in progress */
    }
    
    ssize_t bytes = aio_return(&cb);
    
    close(fd);
}
#endif

Summary & What's Next

Key Takeaways:

  • ✅ stdio uses buffering to reduce system calls
  • ✅ Control buffering with setvbuf() and fflush()
  • ✅ File descriptors provide low-level unbuffered I/O
  • ✅ mmap() enables memory-mapped file access
  • ✅ Large buffers improve performance
  • ✅ Sequential access is faster than random
  • ✅ Batch operations to minimize overhead
  • ✅ Choose I/O method based on access pattern

What's Next?

Let's learn about the C preprocessor and macros!