Strings Basics
Master C strings - null-terminated character arrays. Learn declaration, initialization, the null terminator, string literals, and fundamental differences from other languages' string types.
Understanding C Strings
In C, strings are arrays of characters terminated by a null character (\0). Unlike higher-level languages with string types, C has no built-in string type - just arrays of char with a convention. The null terminator marks the string's end, allowing functions to find where the string stops. This simplicity gives performance but requires careful handling to avoid buffer overflows.
#include <stdio.h>
int main(void) {
/* String as character array */
char str1[6] = {'H', 'e', 'l', 'l', 'o', '\0'};
/* String literal (easier) */
char str2[] = "Hello"; // Automatically adds \0
/* String with explicit size */
char str3[20] = "Hello"; // Rest filled with \0
/* Pointer to string literal */
char *str4 = "Hello"; // Points to read-only memory
printf("%s\n", str1); // Hello
printf("%s\n", str2); // Hello
printf("%s\n", str3); // Hello
printf("%s\n", str4); // Hello
return 0;
}
/* Memory layout of "Hello":
[H][e][l][l][o][\0]
0 1 2 3 4 5
Length: 5 characters
Size: 6 bytes (including \0)
*/String Declaration and Initialization
Strings can be declared as arrays or pointers. Arrays allocate storage and are modifiable; pointers to string literals point to read-only memory. Understanding the difference prevents crashes and undefined behavior.
/* Method 1: Character array (modifiable) */
char str1[20]; // Uninitialized (garbage)
char str2[20] = "Hello"; // Initialized, rest is \0
char str3[] = "Hello"; // Size inferred (6 bytes)
/* Method 2: Pointer to string literal (read-only) */
char *str4 = "Hello"; // Points to string literal
const char *str5 = "Hello"; // Better: explicitly const
/* Method 3: Explicit character initialization */
char str6[] = {'H', 'e', 'l', 'l', 'o', '\0'}; // Manual
/* Difference between array and pointer */
char arr[] = "Hello"; // Array: modifiable
char *ptr = "Hello"; // Pointer: read-only
arr[0] = 'h'; // OK: array is writable
// ptr[0] = 'h'; // CRASH: string literal is read-only
printf("%s\n", arr); // hello
printf("%s\n", ptr); // Hello
/* Size calculation */
char s1[] = "Hello";
printf("Length: %zu\n", strlen(s1)); // 5 (without \0)
printf("Size: %zu\n", sizeof(s1)); // 6 (with \0)
char *s2 = "Hello";
printf("Length: %zu\n", strlen(s2)); // 5
printf("Size: %zu\n", sizeof(s2)); // 8 (pointer size!)
/* Empty string */
char empty1[] = ""; // Size is 1 (just \0)
char empty2[1] = {0}; // Same
char *empty3 = ""; // Points to \0
/* Multiple strings */
char *days[] = {
"Monday",
"Tuesday",
"Wednesday"
};
int num_days = sizeof(days) / sizeof(days[0]);The Null Terminator
The null terminator ('\\0') is essential - it's how C knows where strings end. Without it, string functions read past the array causing crashes or security vulnerabilities. Always ensure strings are null-terminated.
/* Proper null termination */
char str1[6] = "Hello"; // Automatically terminated
char str2[6] = {'H','e','l','l','o','\0'}; // Manually terminated
/* Missing null terminator (DANGEROUS) */
char bad[5] = {'H','e','l','l','o'}; // No \0!
// printf("%s\n", bad); // UNDEFINED: Reads past array
/* Common mistake: Not leaving room for \0 */
char too_small[5];
// strcpy(too_small, "Hello"); // BUFFER OVERFLOW!
// "Hello" needs 6 bytes (5 + \0)
/* Correct size */
char correct[6];
strcpy(correct, "Hello"); // OK: 6 bytes available
/* Manual termination */
char buffer[10];
buffer[0] = 'H';
buffer[1] = 'i';
buffer[2] = '\0'; // Must add \0
printf("%s\n", buffer); // Hi
/* Strings can contain \0 in middle (but functions stop there) */
char with_null[] = "Hello\0World";
printf("%s\n", with_null); // Only prints "Hello"
printf("Size: %zu\n", sizeof(with_null)); // 12 (entire array)
printf("Length: %zu\n", strlen(with_null)); // 5 (stops at first \0)
/* Verifying null termination */
int is_null_terminated(const char *str, size_t max_len) {
for (size_t i = 0; i < max_len; i++) {
if (str[i] == '\0') {
return 1; // Found null terminator
}
}
return 0; // Not terminated within max_len
}
/* Safe string construction */
char safe[20];
size_t len = 0;
safe[len++] = 'H';
safe[len++] = 'i';
safe[len] = '\0'; // Always terminate
printf("%s\n", safe); // HiReading and Writing Strings
C provides several functions for string I/O. Understanding their behavior and limitations prevents buffer overflows and crashes. Always use safe versions like fgets and snprintf.
#include <stdio.h>
#include <string.h>
/* Reading strings */
void string_input_examples(void) {
char name[50];
/* Method 1: scanf (UNSAFE - no length limit) */
printf("Enter name: ");
// scanf("%s", name); // DANGEROUS: Can overflow
/* Better: Limit input length */
scanf("%49s", name); // Read max 49 chars (+ \0)
/* scanf stops at whitespace */
// Input: "John Doe" -> name contains "John"
/* Method 2: fgets (SAFER) */
printf("Enter full name: ");
fgets(name, sizeof(name), stdin);
/* fgets includes newline - remove it */
name[strcspn(name, "\n")] = '\0';
/* Method 3: getline (POSIX, allocates memory) */
char *line = NULL;
size_t len = 0;
printf("Enter text: ");
getline(&line, &len, stdin); // Allocates as needed
printf("You entered: %s", line);
free(line);
}
/* Writing strings */
void string_output_examples(void) {
char message[] = "Hello, World!";
/* printf */
printf("%s\n", message);
/* puts (adds newline) */
puts(message);
/* fputs (no newline) */
fputs(message, stdout);
printf("\n");
/* Formatted output */
printf("Message: '%s'\n", message);
printf("First 5 chars: '%.5s'\n", message); // Hello
printf("Right-aligned: '%20s'\n", message); // " Hello, World!"
printf("Left-aligned: '%-20s'\n", message); // "Hello, World! "
}
/* Safe input with length checking */
int read_string_safe(char *buffer, size_t size) {
if (fgets(buffer, size, stdin) == NULL) {
return -1; // Error or EOF
}
/* Remove newline */
size_t len = strlen(buffer);
if (len > 0 && buffer[len-1] == '\n') {
buffer[len-1] = '\0';
}
return 0; // Success
}
/* Usage */
void safe_input_example(void) {
char name[50];
printf("Enter your name: ");
if (read_string_safe(name, sizeof(name)) == 0) {
printf("Hello, %s!\n", name);
}
}String Literals and Memory
String literals are stored in read-only memory. Attempting to modify them causes undefined behavior, usually a crash. Use char arrays when you need modifiable strings. Understanding string literal behavior prevents mysterious crashes.
/* String literals are read-only */
char *str1 = "Hello"; // Points to string literal
// str1[0] = 'h'; // CRASH: Modifying read-only memory
/* Better: Use const */
const char *str2 = "Hello"; // Explicitly const
// str2[0] = 'h'; // Compiler error (good!)
/* Modifiable copy */
char str3[] = "Hello"; // Array copy (writable)
str3[0] = 'h'; // OK: Modifying array
printf("%s\n", str3); // hello
/* String literal lifetime */
char* get_message(void) {
return "Hello"; // OK: String literal persists
}
/* But local array doesn't! */
char* bad_function(void) {
char local[] = "Hello";
return local; // WRONG: local destroyed after return
}
/* String literal deduplication */
char *s1 = "Hello";
char *s2 = "Hello";
if (s1 == s2) {
printf("Same address!\n"); // May print (compiler optimization)
}
/* Different arrays */
char a1[] = "Hello";
char a2[] = "Hello";
if (a1 == a2) {
printf("Same address\n"); // Never prints (different arrays)
}
/* String literal concatenation */
char *msg1 = "Hello" " " "World"; // Concatenated at compile time
printf("%s\n", msg1); // Hello World
char *msg2 = "This is a long string that "
"spans multiple lines";
printf("%s\n", msg2);
/* Escape sequences in strings */
char *special = "Line 1\nLine 2\tTabbed\n\0Hidden";
printf("%s", special); // Prints: Line 1
// Line 2 Tabbed
// (Hidden part not printed due to \0)
/* Wide strings (for Unicode) */
wchar_t *wide = L"Hello, 世界";
// Requires special functions: wprintf, wcslen, etc.Common String Mistakes
String handling is error-prone in C. Understanding common mistakes helps you avoid crashes, security vulnerabilities, and subtle bugs.
/* Mistake 1: Buffer overflow */
char small[5];
// strcpy(small, "Hello World"); // OVERFLOW!
/* Fix: Check size or use strncpy */
strncpy(small, "Hello World", sizeof(small) - 1);
small[sizeof(small) - 1] = '\0'; // Ensure termination
/* Mistake 2: Forgetting null terminator */
char buf[10];
strncpy(buf, "HelloWorld", 10); // No room for \0!
// buf[10] = '\0'; // Out of bounds!
/* Fix: Leave room */
strncpy(buf, "HelloWorld", 9);
buf[9] = '\0';
/* Mistake 3: Comparing strings with == */
char *s1 = "Hello";
char *s2 = "Hello";
if (s1 == s2) { // Compares ADDRESSES not content!
// May or may not execute
}
/* Fix: Use strcmp */
if (strcmp(s1, s2) == 0) { // Correct: Compares content
printf("Strings are equal\n");
}
/* Mistake 4: Modifying string literals */
char *str = "Hello";
// str[0] = 'h'; // CRASH!
/* Fix: Use array */
char str2[] = "Hello";
str2[0] = 'h'; // OK
/* Mistake 5: Not checking strlen before indexing */
char empty[] = "";
// char last = empty[strlen(empty) - 1]; // WRONG: -1 wraps to huge number
/* Fix: Check first */
size_t len = strlen(empty);
if (len > 0) {
char last = empty[len - 1];
}
/* Mistake 6: scanf without size limit */
char name[20];
// scanf("%s", name); // Can overflow
/* Fix: Limit input */
scanf("%19s", name);
/* Mistake 7: Using uninitialized buffer */
char buf[100];
// strcat(buf, "Hello"); // WRONG: buf uninitialized
/* Fix: Initialize first */
buf[0] = '\0';
strcat(buf, "Hello"); // OK
/* Mistake 8: Off-by-one in allocation */
char *str = malloc(5); // For "Hello"
// strcpy(str, "Hello"); // OVERFLOW: Need 6 bytes!
/* Fix: Include \0 */
char *str2 = malloc(6);
strcpy(str2, "Hello"); // OK
/* Mistake 9: Mixing strlen and sizeof */
char arr[] = "Hello";
printf("strlen: %zu\n", strlen(arr)); // 5
printf("sizeof: %zu\n", sizeof(arr)); // 6
char *ptr = arr;
printf("sizeof ptr: %zu\n", sizeof(ptr)); // 8 (pointer size!)
/* Mistake 10: Not handling scanf return value */
char input[50];
scanf("%s", input); // What if EOF or error?
/* Fix: Check return */
if (scanf("%49s", input) == 1) {
printf("Got: %s\n", input);
} else {
printf("Input error\n");
}Best Practices
Following string best practices makes C code safer and more reliable. These guidelines come from decades of experience dealing with string-related bugs and security issues.
/* Practice 1: Always use const for read-only strings */
void print_message(const char *msg) {
printf("%s\n", msg);
}
/* Practice 2: Use sizeof for buffer sizes */
char buffer[100];
fgets(buffer, sizeof(buffer), stdin); // Not hardcoded 100
/* Practice 3: Prefer fgets over gets/scanf */
char name[50];
fgets(name, sizeof(name), stdin); // Safe
// gets(name); // NEVER USE (removed in C11)
// scanf("%s", name); // Unsafe without size limit
/* Practice 4: Always null-terminate after strncpy */
strncpy(dest, src, size - 1);
dest[size - 1] = '\0';
/* Practice 5: Check string lengths before operations */
if (strlen(str1) + strlen(str2) < sizeof(buffer)) {
strcpy(buffer, str1);
strcat(buffer, str2);
}
/* Practice 6: Use snprintf instead of sprintf */
char buf[50];
// sprintf(buf, "%s: %d", name, value); // Can overflow
snprintf(buf, sizeof(buf), "%s: %d", name, value); // Safe
/* Practice 7: Initialize strings */
char str[100] = {0}; // All zeros
// Or:
char str2[100] = ""; // Empty string
/* Practice 8: Document buffer sizes in comments */
char username[32]; // Max 31 chars + \0
char password[64]; // Max 63 chars + \0
/* Practice 9: Create safe wrapper functions */
size_t safe_strlen(const char *str, size_t maxlen) {
if (str == NULL) return 0;
size_t len = 0;
while (len < maxlen && str[len] != '\0') {
len++;
}
return len;
}
/* Practice 10: Use size_t for string lengths */
size_t len = strlen(str); // Not int
/* Practice 11: Validate string pointers */
void process_string(const char *str) {
if (str == NULL) {
return; // Or handle error
}
// Process str...
}
/* Practice 12: Be explicit about string lifetime */
// This string persists:
const char *get_error_msg(void) {
static const char *msg = "Error occurred";
return msg; // Safe: static duration
}
// This string is copied:
void copy_string(char *dest, size_t size, const char *src) {
strncpy(dest, src, size - 1);
dest[size - 1] = '\0';
}Summary & What's Next
Key Takeaways:
- ✅ Strings are null-terminated character arrays
- ✅ '\\0' marks the end of strings
- ✅ String literals are read-only
- ✅ char[] creates modifiable copy
- ✅ Always ensure null termination
- ✅ Use fgets, not gets or unsafe scanf
- ✅ Compare with strcmp, not ==
- ✅ Use sizeof for buffer sizes, not hardcoded numbers