C Programming: Low-Level Mastery
HomeInsightsCoursesC ProgrammingWorking with Binary Files
File I/O

Binary File I/O

Master binary file I/O for efficient storage of structured data. Learn fread(), fwrite(), binary vs text modes, serialization, endianness, portability, and working with records, arrays, and complex data structures.

Binary vs Text Files

Binary files store data in raw memory format - exact bytes, no conversion. Text files convert data to human-readable characters. Binary is faster, more compact, perfect for structured data. Text is portable, editable, debuggable. Choose based on your needs: binary for performance, text for interoperability.

C
#include <stdio.h>
#include <stdint.h>

/* Text vs Binary comparison */
void compare_text_binary(void) {
    int number = 12345;
    
    /* Text file: "12345" (5 bytes + newline) */
    FILE *text = fopen("text.txt", "w");
    if (text != NULL) {
        fprintf(text, "%d\n", number);  /* 6 bytes */
        fclose(text);
    }
    
    /* Binary file: raw bytes (4 bytes) */
    FILE *binary = fopen("binary.bin", "wb");
    if (binary != NULL) {
        fwrite(&number, sizeof(int), 1, binary);  /* 4 bytes */
        fclose(binary);
    }
    
    /* Binary is smaller and faster */
    /* Text is human-readable */
}

/* Binary mode vs text mode */
/*
   Text mode ("r", "w"):
   - Converts newlines (\n <-&gt; \r\n on Windows)
   - May treat Ctrl-Z as EOF
   - Platform-dependent
   
   Binary mode ("rb", "wb"):
   - No conversion
   - Exact byte-for-byte
   - Platform-independent (data wise)
*/

/* When to use binary */
/*
   Use binary when:
   - Storing structured data (structs, arrays)
   - Need speed/efficiency
   - Working with images, audio, video
   - Exact byte representation required
   - Serializing complex data
   
   Use text when:
   - Human readability matters
   - Need interoperability
   - Debugging
   - Configuration files
   - Logs
*/

Writing Binary Data

fwrite() writes binary data directly from memory. Specify pointer, element size, count, and FILE*. Returns elements written. Perfect for structures, arrays, and any data that can be memcpy'd. No formatting overhead like fprintf().

C
/* fwrite() - write binary data */
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);

/* Returns: Number of elements written */

/* Writing single values */
void write_int(const char *filename, int value) {
    FILE *file = fopen(filename, "wb");
    if (file == NULL) {
        return;
    }
    
    if (fwrite(&value, sizeof(int), 1, file) != 1) {
        perror("Write failed");
    }
    
    fclose(file);
}

/* Writing structures */
typedef struct {
    int id;
    char name[50];
    float salary;
    uint32_t flags;
} Employee;

void write_employee(const char *filename, const Employee *emp) {
    FILE *file = fopen(filename, "wb");
    if (file == NULL) {
        return;
    }
    
    if (fwrite(emp, sizeof(Employee), 1, file) != 1) {
        perror("Write failed");
    }
    
    fclose(file);
}

/* Writing arrays */
void write_array(const char *filename, const int *arr, size_t count) {
    FILE *file = fopen(filename, "wb");
    if (file == NULL) {
        return;
    }
    
    size_t written = fwrite(arr, sizeof(int), count, file);
    
    if (written != count) {
        fprintf(stderr, "Only wrote %zu of %zu elements\n", written, count);
    }
    
    fclose(file);
}

/* Writing multiple structures */
void write_employees(const char *filename, const Employee *emps, size_t count) {
    FILE *file = fopen(filename, "wb");
    if (file == NULL) {
        return;
    }
    
    /* Write count first (for reading later) */
    fwrite(&count, sizeof(size_t), 1, file);
    
    /* Write array */
    fwrite(emps, sizeof(Employee), count, file);
    
    fclose(file);
}

/* Writing with header */
typedef struct {
    uint32_t magic;      /* File identifier */
    uint16_t version;    /* Format version */
    uint32_t record_count;
} FileHeader;

#define MAGIC_NUMBER 0x454D5044  /* "EMPD" */
#define VERSION 1

void write_database(const char *filename, const Employee *emps, size_t count) {
    FILE *file = fopen(filename, "wb");
    if (file == NULL) {
        return;
    }
    
    /* Write header */
    FileHeader header = {
        .magic = MAGIC_NUMBER,
        .version = VERSION,
        .record_count = count
    };
    
    fwrite(&header, sizeof(FileHeader), 1, file);
    
    /* Write records */
    fwrite(emps, sizeof(Employee), count, file);
    
    fclose(file);
}

/* Appending binary data */
void append_employee(const char *filename, const Employee *emp) {
    FILE *file = fopen(filename, "ab");  /* Binary append */
    if (file == NULL) {
        return;
    }
    
    fwrite(emp, sizeof(Employee), 1, file);
    
    fclose(file);
}

Reading Binary Data

fread() reads binary data directly into memory. Specify destination pointer, element size, count, and FILE*. Returns elements read. Always check return value - it may be less than requested at EOF or on error.

C
/* fread() - read binary data */
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);

/* Returns: Number of elements read */

/* Reading single values */
int read_int(const char *filename, int *value) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return -1;
    }
    
    if (fread(value, sizeof(int), 1, file) != 1) {
        fclose(file);
        return -1;
    }
    
    fclose(file);
    return 0;
}

/* Reading structures */
int read_employee(const char *filename, Employee *emp) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return -1;
    }
    
    if (fread(emp, sizeof(Employee), 1, file) != 1) {
        if (feof(file)) {
            /* End of file */
        } else {
            perror("Read failed");
        }
        fclose(file);
        return -1;
    }
    
    fclose(file);
    return 0;
}

/* Reading arrays (known size) */
int read_array(const char *filename, int *arr, size_t count) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return -1;
    }
    
    size_t read_count = fread(arr, sizeof(int), count, file);
    
    fclose(file);
    return read_count;
}

/* Reading arrays (unknown size) */
int* read_array_dynamic(const char *filename, size_t *count) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return NULL;
    }
    
    /* Get file size */
    fseek(file, 0, SEEK_END);
    long size = ftell(file);
    fseek(file, 0, SEEK_SET);
    
    if (size < 0 || size % sizeof(int) != 0) {
        fclose(file);
        return NULL;
    }
    
    *count = size / sizeof(int);
    
    /* Allocate */
    int *arr = malloc(size);
    if (arr == NULL) {
        fclose(file);
        return NULL;
    }
    
    /* Read */
    if (fread(arr, sizeof(int), *count, file) != *count) {
        free(arr);
        fclose(file);
        return NULL;
    }
    
    fclose(file);
    return arr;
}

/* Reading with header */
Employee* read_database(const char *filename, size_t *count) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return NULL;
    }
    
    /* Read header */
    FileHeader header;
    if (fread(&header, sizeof(FileHeader), 1, file) != 1) {
        fclose(file);
        return NULL;
    }
    
    /* Validate */
    if (header.magic != MAGIC_NUMBER) {
        fprintf(stderr, "Invalid file format\n");
        fclose(file);
        return NULL;
    }
    
    if (header.version != VERSION) {
        fprintf(stderr, "Unsupported version %u\n", header.version);
        fclose(file);
        return NULL;
    }
    
    /* Allocate */
    Employee *emps = malloc(header.record_count * sizeof(Employee));
    if (emps == NULL) {
        fclose(file);
        return NULL;
    }
    
    /* Read records */
    if (fread(emps, sizeof(Employee), header.record_count, file) != header.record_count) {
        free(emps);
        fclose(file);
        return NULL;
    }
    
    *count = header.record_count;
    fclose(file);
    return emps;
}

/* Reading all records */
Employee* read_all_employees(const char *filename, size_t *count) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return NULL;
    }
    
    /* Count records */
    size_t capacity = 10;
    size_t num_records = 0;
    Employee *emps = malloc(capacity * sizeof(Employee));
    
    if (emps == NULL) {
        fclose(file);
        return NULL;
    }
    
    /* Read records */
    while (fread(&emps[num_records], sizeof(Employee), 1, file) == 1) {
        num_records++;
        
        /* Grow if needed */
        if (num_records >= capacity) {
            capacity *= 2;
            Employee *temp = realloc(emps, capacity * sizeof(Employee));
            if (temp == NULL) {
                free(emps);
                fclose(file);
                return NULL;
            }
            emps = temp;
        }
    }
    
    if (ferror(file)) {
        free(emps);
        fclose(file);
        return NULL;
    }
    
    fclose(file);
    *count = num_records;
    return emps;
}

Endianness and Portability

Binary files have portability issues: endianness (byte order), padding, pointer sizes. Little-endian stores least significant byte first, big-endian stores most significant first. For portable binary files, define explicit layouts, convert endianness, or use text formats.

C
/* Endianness detection */
int is_little_endian(void) {
    uint32_t test = 1;
    return *(uint8_t*)&test == 1;
}

/* Byte swapping */
uint16_t swap_uint16(uint16_t value) {
    return (value >> 8) | (value << 8);
}

uint32_t swap_uint32(uint32_t value) {
    return ((value >> 24) & 0x000000FF) |
           ((value >>  8) & 0x0000FF00) |
           ((value <<  8) & 0x00FF0000) |
           ((value << 24) & 0xFF000000);
}

uint64_t swap_uint64(uint64_t value) {
    return ((value >> 56) & 0x00000000000000FFULL) |
           ((value >> 40) & 0x000000000000FF00ULL) |
           ((value >> 24) & 0x0000000000FF0000ULL) |
           ((value >>  8) & 0x00000000FF000000ULL) |
           ((value <<  8) & 0x000000FF00000000ULL) |
           ((value << 24) & 0x0000FF0000000000ULL) |
           ((value << 40) & 0x00FF000000000000ULL) |
           ((value << 56) & 0xFF00000000000000ULL);
}

/* Portable integer writing */
void write_uint32_portable(FILE *file, uint32_t value) {
    /* Always write as big-endian */
    uint8_t bytes[4];
    bytes[0] = (value >> 24) & 0xFF;
    bytes[1] = (value >> 16) & 0xFF;
    bytes[2] = (value >>  8) & 0xFF;
    bytes[3] = value & 0xFF;
    
    fwrite(bytes, 1, 4, file);
}

uint32_t read_uint32_portable(FILE *file) {
    uint8_t bytes[4];
    if (fread(bytes, 1, 4, file) != 4) {
        return 0;
    }
    
    return ((uint32_t)bytes[0] << 24) |
           ((uint32_t)bytes[1] << 16) |
           ((uint32_t)bytes[2] <<  8) |
            (uint32_t)bytes[3];
}

/* Portable structure format */
typedef struct {
    uint32_t id;      /* Fixed size */
    char name[50];    /* Fixed size string */
    uint32_t salary;  /* Fixed size, no float for portability */
} PortableEmployee;

/* Write portable */
void write_portable_employee(FILE *file, const PortableEmployee *emp) {
    write_uint32_portable(file, emp-&gt;id);
    fwrite(emp-&gt;name, 1, sizeof(emp-&gt;name), file);
    write_uint32_portable(file, emp-&gt;salary);
}

/* Read portable */
int read_portable_employee(FILE *file, PortableEmployee *emp) {
    emp-&gt;id = read_uint32_portable(file);
    
    if (fread(emp-&gt;name, 1, sizeof(emp-&gt;name), file) != sizeof(emp-&gt;name)) {
        return -1;
    }
    
    emp-&gt;salary = read_uint32_portable(file);
    return 0;
}

/* Portability issues to avoid */
/*
   1. Pointer storage (size varies)
   2. Padding (compiler-dependent)
   3. Float representation (use integers or text)
   4. Enum sizes (use fixed-size ints)
   5. Bitfields (layout varies)
   6. sizeof(long) varies
*/

/* Portable binary format best practices */
/*
   1. Use fixed-size types (uint32_t, not int)
   2. Define explicit byte order
   3. Pack structures (#pragma pack or attributes)
   4. Avoid pointers in structures
   5. Document format explicitly
   6. Add version numbers
   7. Add magic numbers for validation
   8. Consider using libraries (Protocol Buffers, msgpack)
*/

/* Example portable file format */
typedef struct {
    uint32_t magic;       /* 'MYFT' */
    uint16_t major_ver;
    uint16_t minor_ver;
    uint32_t record_count;
    uint32_t reserved[4]; /* Future use */
} PortableHeader;

/* Write with endian conversion */
void write_portable_header(FILE *file, const PortableHeader *hdr) {
    write_uint32_portable(file, hdr-&gt;magic);
    write_uint32_portable(file, hdr-&gt;major_ver);
    write_uint32_portable(file, hdr-&gt;minor_ver);
    write_uint32_portable(file, hdr-&gt;record_count);
    for (int i = 0; i < 4; i++) {
        write_uint32_portable(file, hdr-&gt;reserved[i]);
    }
}

Random Access and Record Updates

Binary files enable efficient random access - seek to any record, read or update it. Calculate offsets based on record size and position. Useful for databases, indexes, and direct access patterns. Combine fseek() with fread()/fwrite().

C
/* Random access database */
typedef struct {
    uint32_t id;
    char name[50];
    float salary;
    uint8_t active;
} DBRecord;

#define RECORD_SIZE sizeof(DBRecord)

/* Read specific record */
int read_record(FILE *file, size_t index, DBRecord *record) {
    /* Calculate offset */
    long offset = index * RECORD_SIZE;
    
    /* Seek to record */
    if (fseek(file, offset, SEEK_SET) != 0) {
        return -1;
    }
    
    /* Read */
    if (fread(record, RECORD_SIZE, 1, file) != 1) {
        return -1;
    }
    
    return 0;
}

/* Write specific record */
int write_record(FILE *file, size_t index, const DBRecord *record) {
    long offset = index * RECORD_SIZE;
    
    if (fseek(file, offset, SEEK_SET) != 0) {
        return -1;
    }
    
    if (fwrite(record, RECORD_SIZE, 1, file) != 1) {
        return -1;
    }
    
    return 0;
}

/* Update record */
int update_record(const char *filename, size_t index, const DBRecord *record) {
    FILE *file = fopen(filename, "r+b");  /* Read/write binary */
    if (file == NULL) {
        return -1;
    }
    
    int result = write_record(file, index, record);
    
    fclose(file);
    return result;
}

/* Search for record */
int find_record_by_id(const char *filename, uint32_t id, DBRecord *result) {
    FILE *file = fopen(filename, "rb");
    if (file == NULL) {
        return -1;
    }
    
    DBRecord record;
    size_t index = 0;
    
    while (fread(&record, RECORD_SIZE, 1, file) == 1) {
        if (record.id == id) {
            *result = record;
            fclose(file);
            return index;
        }
        index++;
    }
    
    fclose(file);
    return -1;  /* Not found */
}

/* Mark record as deleted (soft delete) */
int delete_record(const char *filename, size_t index) {
    FILE *file = fopen(filename, "r+b");
    if (file == NULL) {
        return -1;
    }
    
    DBRecord record;
    if (read_record(file, index, &record) != 0) {
        fclose(file);
        return -1;
    }
    
    /* Mark as inactive */
    record.active = 0;
    
    if (write_record(file, index, &record) != 0) {
        fclose(file);
        return -1;
    }
    
    fclose(file);
    return 0;
}

/* Compact file (remove deleted records) */
int compact_database(const char *filename) {
    FILE *input = fopen(filename, "rb");
    if (input == NULL) {
        return -1;
    }
    
    FILE *output = fopen("temp.db", "wb");
    if (output == NULL) {
        fclose(input);
        return -1;
    }
    
    DBRecord record;
    size_t written = 0;
    
    while (fread(&record, RECORD_SIZE, 1, input) == 1) {
        if (record.active) {
            fwrite(&record, RECORD_SIZE, 1, output);
            written++;
        }
    }
    
    fclose(input);
    fclose(output);
    
    /* Replace original with compacted file */
    remove(filename);
    rename("temp.db", filename);
    
    return written;
}

/* Index file for fast lookup */
typedef struct {
    uint32_t id;
    size_t offset;  /* Byte offset in data file */
} IndexEntry;

/* Build index */
int build_index(const char *datafile, const char *indexfile) {
    FILE *data = fopen(datafile, "rb");
    if (data == NULL) {
        return -1;
    }
    
    FILE *index = fopen(indexfile, "wb");
    if (index == NULL) {
        fclose(data);
        return -1;
    }
    
    DBRecord record;
    size_t offset = 0;
    
    while (fread(&record, RECORD_SIZE, 1, data) == 1) {
        if (record.active) {
            IndexEntry entry = {record.id, offset};
            fwrite(&entry, sizeof(IndexEntry), 1, index);
        }
        offset += RECORD_SIZE;
    }
    
    fclose(data);
    fclose(index);
    return 0;
}

/* Search using index */
int indexed_search(const char *datafile, const char *indexfile,
                  uint32_t id, DBRecord *result) {
    FILE *index = fopen(indexfile, "rb");
    if (index == NULL) {
        return -1;
    }
    
    /* Binary search in index (if sorted) */
    IndexEntry entry;
    while (fread(&entry, sizeof(IndexEntry), 1, index) == 1) {
        if (entry.id == id) {
            fclose(index);
            
            /* Found in index, read from data file */
            FILE *data = fopen(datafile, "rb");
            if (data == NULL) {
                return -1;
            }
            
            fseek(data, entry.offset, SEEK_SET);
            fread(result, RECORD_SIZE, 1, data);
            
            fclose(data);
            return 0;
        }
    }
    
    fclose(index);
    return -1;  /* Not found */
}

Summary & What's Next

Key Takeaways:

  • ✅ Binary files store raw bytes efficiently
  • ✅ fread() and fwrite() for binary I/O
  • ✅ Binary is faster and smaller than text
  • ✅ Use "b" mode ("rb", "wb") for binary files
  • ✅ Endianness affects portability
  • ✅ Use fixed-size types for portable formats
  • ✅ fseek() enables random access
  • ✅ Add headers with magic numbers and versions

What's Next?

Let's learn about error handling and file system operations!