String Manipulation
Master advanced string manipulation techniques - custom string operations, parsing, validation, transformation, and building your own safe string utilities for real-world C programming.
Building Safe String Utilities
Creating your own string utilities helps avoid common pitfalls with standard library functions. Safe wrappers check bounds, handle edge cases, and provide clearer APIs for your codebase.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
/* Safe string copy with size checking */
int str_copy(char *dest, size_t dest_size, const char *src) {
if (dest == NULL || src == NULL || dest_size == 0) {
return -1;
}
size_t src_len = strlen(src);
if (src_len >= dest_size) {
return -1; // Not enough space
}
strcpy(dest, src);
return 0;
}
/* Safe string concatenation */
int str_append(char *dest, size_t dest_size, const char *src) {
if (dest == NULL || src == NULL || dest_size == 0) {
return -1;
}
size_t dest_len = strlen(dest);
size_t src_len = strlen(src);
if (dest_len + src_len >= dest_size) {
return -1; // Not enough space
}
strcat(dest, src);
return 0;
}
/* Safe string formatting */
int str_format(char *dest, size_t dest_size, const char *format, ...) {
va_list args;
va_start(args, format);
int result = vsnprintf(dest, dest_size, format, args);
va_end(args);
if (result < 0 || (size_t)result >= dest_size) {
return -1; // Error or truncated
}
return 0;
}
/* Usage */
void safe_string_examples(void) {
char buffer[50];
if (str_copy(buffer, sizeof(buffer), "Hello") == 0) {
printf("%s\n", buffer);
}
if (str_append(buffer, sizeof(buffer), " World") == 0) {
printf("%s\n", buffer); // Hello World
}
if (str_format(buffer, sizeof(buffer), "%s: %d", "Count", 42) == 0) {
printf("%s\n", buffer); // Count: 42
}
}String Trimming and Padding
Trimming removes whitespace from string ends. Padding adds characters to reach a target length. These operations are common in text processing, parsing user input, and formatting output.
/* Trim leading whitespace */
char* trim_left(char *str) {
while (isspace((unsigned char)*str)) {
str++;
}
return str;
}
/* Trim trailing whitespace (modifies string) */
void trim_right(char *str) {
if (*str == '\0') {
return;
}
char *end = str + strlen(str) - 1;
while (end >= str && isspace((unsigned char)*end)) {
end--;
}
end[1] = '\0';
}
/* Trim both sides */
char* trim(char *str) {
str = trim_left(str);
trim_right(str);
return str;
}
/* Trim in place preserving pointer */
void trim_inplace(char *str) {
char *trimmed = trim(str);
if (trimmed != str) {
memmove(str, trimmed, strlen(trimmed) + 1);
}
}
/* Left pad with character */
void pad_left(char *dest, size_t dest_size, const char *src, char pad_char) {
size_t src_len = strlen(src);
if (src_len >= dest_size - 1) {
strncpy(dest, src, dest_size - 1);
dest[dest_size - 1] = '\0';
return;
}
size_t padding = dest_size - 1 - src_len;
memset(dest, pad_char, padding);
strcpy(dest + padding, src);
}
/* Right pad with character */
void pad_right(char *dest, size_t dest_size, const char *src, char pad_char) {
size_t src_len = strlen(src);
if (src_len >= dest_size - 1) {
strncpy(dest, src, dest_size - 1);
dest[dest_size - 1] = '\0';
return;
}
strcpy(dest, src);
memset(dest + src_len, pad_char, dest_size - 1 - src_len);
dest[dest_size - 1] = '\0';
}
/* Usage examples */
void trim_pad_examples(void) {
char str1[] = " Hello ";
trim_inplace(str1);
printf("'%s'\n", str1); // 'Hello'
char padded[20];
pad_left(padded, sizeof(padded), "42", '0');
printf("'%s'\n", padded); // '000000000000000042'
pad_right(padded, sizeof(padded), "Name", '.');
printf("'%s'\n", padded); // 'Name...............'
}String Case Conversion
Converting case is common in user input handling, normalization, and case-insensitive operations. Always use unsigned char with ctype functions to avoid undefined behavior with negative values.
/* Convert to uppercase */
void str_toupper(char *str) {
while (*str) {
*str = toupper((unsigned char)*str);
str++;
}
}
/* Convert to lowercase */
void str_tolower(char *str) {
while (*str) {
*str = tolower((unsigned char)*str);
str++;
}
}
/* Title case (capitalize first letter of each word) */
void str_totitle(char *str) {
int new_word = 1;
while (*str) {
if (isspace((unsigned char)*str)) {
new_word = 1;
} else {
if (new_word) {
*str = toupper((unsigned char)*str);
new_word = 0;
} else {
*str = tolower((unsigned char)*str);
}
}
str++;
}
}
/* Sentence case (capitalize first letter) */
void str_tosentence(char *str) {
int first = 1;
while (*str) {
if (first && isalpha((unsigned char)*str)) {
*str = toupper((unsigned char)*str);
first = 0;
} else {
*str = tolower((unsigned char)*str);
}
str++;
}
}
/* Case-insensitive string comparison (custom) */
int str_cmp_nocase(const char *s1, const char *s2) {
while (*s1 && *s2) {
int c1 = tolower((unsigned char)*s1);
int c2 = tolower((unsigned char)*s2);
if (c1 != c2) {
return c1 - c2;
}
s1++;
s2++;
}
return tolower((unsigned char)*s1) - tolower((unsigned char)*s2);
}
/* Usage */
void case_examples(void) {
char str1[] = "hello world";
str_toupper(str1);
printf("%s\n", str1); // HELLO WORLD
char str2[] = "HELLO WORLD";
str_tolower(str2);
printf("%s\n", str2); // hello world
char str3[] = "hello world from c";
str_totitle(str3);
printf("%s\n", str3); // Hello World From C
char str4[] = "HELLO WORLD";
str_tosentence(str4);
printf("%s\n", str4); // Hello world
if (str_cmp_nocase("Hello", "HELLO") == 0) {
printf("Equal (case-insensitive)\n");
}
}String Replacement and Substitution
Replacing substrings requires careful buffer management. In-place replacement is complex; typically you build a new string. Handle edge cases like overlapping matches and multiple occurrences.
/* Replace first occurrence */
int str_replace_first(char *dest, size_t dest_size,
const char *src, const char *old, const char *new) {
const char *pos = strstr(src, old);
if (pos == NULL) {
/* No match - just copy */
if (strlen(src) >= dest_size) {
return -1;
}
strcpy(dest, src);
return 0;
}
size_t prefix_len = pos - src;
size_t old_len = strlen(old);
size_t new_len = strlen(new);
size_t suffix_len = strlen(pos + old_len);
if (prefix_len + new_len + suffix_len >= dest_size) {
return -1; // Not enough space
}
/* Copy prefix */
memcpy(dest, src, prefix_len);
/* Copy replacement */
memcpy(dest + prefix_len, new, new_len);
/* Copy suffix */
strcpy(dest + prefix_len + new_len, pos + old_len);
return 1; /* Number of replacements */
}
/* Replace all occurrences */
int str_replace_all(char *dest, size_t dest_size,
const char *src, const char *old, const char *new) {
if (strlen(old) == 0) {
return -1; /* Invalid */
}
size_t old_len = strlen(old);
size_t new_len = strlen(new);
const char *pos = src;
char *dest_pos = dest;
size_t remaining = dest_size;
int count = 0;
while ((pos = strstr(pos, old)) != NULL) {
/* Copy text before match */
size_t prefix = pos - src;
if (prefix >= remaining) {
return -1;
}
memcpy(dest_pos, src, prefix);
dest_pos += prefix;
remaining -= prefix;
src = pos;
/* Copy replacement */
if (new_len >= remaining) {
return -1;
}
memcpy(dest_pos, new, new_len);
dest_pos += new_len;
remaining -= new_len;
/* Skip past old text */
pos += old_len;
src += old_len;
count++;
}
/* Copy remaining text */
if (strlen(src) >= remaining) {
return -1;
}
strcpy(dest_pos, src);
return count;
}
/* Usage */
void replacement_examples(void) {
char result[200];
/* Replace first */
if (str_replace_first(result, sizeof(result),
"Hello World World", "World", "Universe") > 0) {
printf("%s\n", result); // Hello Universe World
}
/* Replace all */
int count = str_replace_all(result, sizeof(result),
"foo bar foo baz foo", "foo", "qux");
printf("%s (replaced %d)\n", result, count); // qux bar qux baz qux (replaced 3)
/* Character replacement (simpler) */
char str[] = "Hello-World-Test";
for (char *p = str; *p; p++) {
if (*p == '-') {
*p = ' ';
}
}
printf("%s\n", str); // Hello World Test
}String Validation and Checking
Validating strings ensures data quality and prevents errors. Check for specific patterns, character types, and formats to enforce constraints on user input or data processing.
/* Check if string is all digits */
int str_isdigits(const char *str) {
if (*str == '\0') {
return 0; /* Empty string */
}
while (*str) {
if (!isdigit((unsigned char)*str)) {
return 0;
}
str++;
}
return 1;
}
/* Check if string is all alphabetic */
int str_isalpha(const char *str) {
if (*str == '\0') {
return 0;
}
while (*str) {
if (!isalpha((unsigned char)*str)) {
return 0;
}
str++;
}
return 1;
}
/* Check if string is alphanumeric */
int str_isalnum(const char *str) {
if (*str == '\0') {
return 0;
}
while (*str) {
if (!isalnum((unsigned char)*str)) {
return 0;
}
str++;
}
return 1;
}
/* Check if string starts with prefix */
int str_startswith(const char *str, const char *prefix) {
size_t prefix_len = strlen(prefix);
return strncmp(str, prefix, prefix_len) == 0;
}
/* Check if string ends with suffix */
int str_endswith(const char *str, const char *suffix) {
size_t str_len = strlen(str);
size_t suffix_len = strlen(suffix);
if (suffix_len > str_len) {
return 0;
}
return strcmp(str + str_len - suffix_len, suffix) == 0;
}
/* Check if string contains only specific characters */
int str_contains_only(const char *str, const char *charset) {
return strspn(str, charset) == strlen(str);
}
/* Validate email (simple) */
int str_is_email(const char *str) {
const char *at = strchr(str, '@');
if (at == NULL || at == str) {
return 0; /* No @ or starts with @ */
}
const char *dot = strchr(at, '.');
if (dot == NULL || dot == at + 1 || *(dot + 1) == '\0') {
return 0; /* No . after @ or invalid position */
}
return 1;
}
/* Usage */
void validation_examples(void) {
printf("%d\n", str_isdigits("12345")); // 1 (true)
printf("%d\n", str_isdigits("123a5")); // 0 (false)
printf("%d\n", str_isalpha("Hello")); // 1 (true)
printf("%d\n", str_isalpha("Hello!")); // 0 (false)
printf("%d\n", str_startswith("Hello World", "Hello")); // 1
printf("%d\n", str_endswith("test.txt", ".txt")); // 1
printf("%d\n", str_contains_only("123456", "0123456789")); // 1
printf("%d\n", str_contains_only("123a56", "0123456789")); // 0
printf("%d\n", str_is_email("user@example.com")); // 1
printf("%d\n", str_is_email("invalid")); // 0
}String Splitting and Joining
Splitting breaks strings into parts. Joining combines multiple strings. Unlike strtok, custom splitters can avoid modifying the original and handle edge cases better.
/* Split string into array (allocates memory) */
char** str_split(const char *str, char delimiter, int *count) {
/* Count delimiters */
int num_parts = 1;
for (const char *p = str; *p; p++) {
if (*p == delimiter) {
num_parts++;
}
}
/* Allocate array */
char **parts = malloc(num_parts * sizeof(char*));
if (parts == NULL) {
*count = 0;
return NULL;
}
/* Split and copy */
int idx = 0;
const char *start = str;
const char *end;
while ((end = strchr(start, delimiter)) != NULL) {
size_t len = end - start;
parts[idx] = malloc(len + 1);
if (parts[idx] == NULL) {
/* Cleanup on error */
for (int i = 0; i < idx; i++) {
free(parts[i]);
}
free(parts);
*count = 0;
return NULL;
}
memcpy(parts[idx], start, len);
parts[idx][len] = '\0';
idx++;
start = end + 1;
}
/* Last part */
parts[idx] = strdup(start);
if (parts[idx] == NULL) {
for (int i = 0; i < idx; i++) {
free(parts[i]);
}
free(parts);
*count = 0;
return NULL;
}
*count = num_parts;
return parts;
}
/* Free split result */
void str_split_free(char **parts, int count) {
for (int i = 0; i < count; i++) {
free(parts[i]);
}
free(parts);
}
/* Join strings with delimiter */
char* str_join(char **parts, int count, const char *delimiter) {
if (count == 0) {
return strdup("");
}
/* Calculate total length */
size_t total_len = 0;
size_t delim_len = strlen(delimiter);
for (int i = 0; i < count; i++) {
total_len += strlen(parts[i]);
}
total_len += delim_len * (count - 1);
/* Allocate */
char *result = malloc(total_len + 1);
if (result == NULL) {
return NULL;
}
/* Join */
result[0] = '\0';
for (int i = 0; i < count; i++) {
strcat(result, parts[i]);
if (i < count - 1) {
strcat(result, delimiter);
}
}
return result;
}
/* Usage */
void split_join_examples(void) {
/* Split */
int count;
char **parts = str_split("apple,banana,cherry", ',', &count);
if (parts != NULL) {
for (int i = 0; i < count; i++) {
printf("Part %d: %s\n", i, parts[i]);
}
/* Join back */
char *joined = str_join(parts, count, " | ");
if (joined != NULL) {
printf("Joined: %s\n", joined); // apple | banana | cherry
free(joined);
}
str_split_free(parts, count);
}
}Summary & What's Next
Key Takeaways:
- ✅ Build safe wrapper functions for common operations
- ✅ Always check buffer sizes before manipulation
- ✅ Trim removes whitespace, pad adds characters
- ✅ Use unsigned char with ctype.h functions
- ✅ Replace operations need careful buffer management
- ✅ Validation prevents invalid data from propagating
- ✅ Custom split/join avoids strtok limitations
- ✅ Free allocated memory from string operations