C语言字符串指针深度解析
一、C语言字符串基础:字符序列与空终止符
C语言并未像许多高级语言那样内置字符串类型,而是借助字符数组与空字符'\0'来模拟字符串。字符串事实上就是以'\0'结尾的字符序列。这种设计简单高效,也为开发者带来了很多需要小心处理的内存细节。
可以把字符串想象成一列火车:
- 每一个字符是一节车厢
- 结束标记'\0'是列车的尾灯
- 指针则类似于时刻表,指向列车的位置
char str[] = "Hello"; // 内存分布:'H','e','l','l','o','\0'
二、字符串的多种表示与内存布局
2.1 三种主要表示法
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void show_string_ways() {
// 方式1:字符数组(栈区)
char s1[] = "Hello World";
printf("字符数组: %s, 大小: %zu 字节\n", s1, sizeof(s1));
// 方式2:指针指向字面量(常量区)
char *s2 = "Hello World";
printf("指针字面量: %s, 指针大小: %zu 字节\n", s2, sizeof(s2));
// 方式3:动态分配(堆区)
char *s3 = malloc(12 * sizeof(char));
if (s3) {
strcpy(s3, "Hello World");
printf("动态分配: %s\n", s3);
free(s3);
}
printf("\n存储区域对比:\n");
printf("s1: 栈区,可修改\n");
printf("s2: 常量区,通常不可修改\n");
printf("s3: 堆区,可修改,需手动释放\n");
}
2.2 空终止符'\0'的必要性
#include <stdio.h>
#include <string.h>
void null_demo() {
// 正确的字符串,自动包含'\0'
char a[] = "Hi";
printf("有效字符串长度: %zu\n", strlen(a));
// 缺少'\0'的数组
char b[3] = {'H','i','!'}; // 缺少空字符
printf("潜在危险,strlen可能异常:%zu\n", strlen(b)); // 未定义行为
// 手动确保'\0'存在
char c[4] = {'H','i','!','\0'};
printf("安全字符串: %s, 长度: %zu\n", c, strlen(c));
}
三、字符指针的高级操作
3.1 初始化与可变性
void pointer_init() {
// 指向字面量,不可修改
const char *p1 = "Fixed String";
// p1[0] = 'X'; // 运行时错误
// 指向字符数组,可以修改
char arr[] = "Changeable";
char *p2 = arr;
p2[0] = 'X';
printf("修改后: %s\n", p2);
// 动态分配,可以修改
char *p3 = malloc(20);
if (p3) {
strcpy(p3, "Dynamic");
p3[0] = 'D';
printf("动态字符串: %s\n", p3);
free(p3);
}
}
3.2 指针运算遍历字符串
void pointer_arithmetic() {
char str[] = "C Pointer";
char *p = str;
// 正向遍历
while (*p) {
putchar(*p);
p++;
}
putchar('\n');
// 通过偏移访问
p = str;
printf("第3个字符: %c\n", *(p + 2));
// 计算长度
char *start = str, *end = str;
while (*end) end++;
printf("长度: %ld\n", end - start);
// 反向遍历
p = end - 1;
while (p >= start) {
putchar(*p);
p--;
}
putchar('\n');
}
四、标准字符串函数详解
4.1 核心函数用法
void std_funcs() {
char src[] = "Hello", dst[20];
// strlen
printf("长度: %zu\n", strlen(src));
// strcpy (不安全,推荐strncpy)
strncpy(dst, src, sizeof(dst) - 1);
dst[sizeof(dst) - 1] = '\0';
printf("复制结果: %s\n", dst);
// strcat (不安全,需确保空间)
char buf[30] = "Hello";
strncat(buf, " World", sizeof(buf) - strlen(buf) - 1);
printf("连接后: %s\n", buf);
// strcmp
int cmp = strcmp("apple", "banana");
printf("比较结果: %d\n", cmp);
// strchr
char *pos = strchr("example", 'm');
if (pos) printf("找到字符: %c\n", *pos);
// strstr
char *sub = strstr("C language", "lang");
if (sub) printf("子串位置: %s\n", sub);
// strtok
char data[] = "a,b,c,d";
char *token = strtok(data, ",");
while (token) {
printf("令牌: %s\n", token);
token = strtok(NULL, ",");
}
}
4.2 手动实现常用函数
// 自定义strlen
size_t my_len(const char *s) {
const char *p = s;
while (*p) p++;
return p - s;
}
// 自定义strcpy
char* my_cpy(char *d, const char *s) {
char *ret = d;
while ((*d++ = *s++));
return ret;
}
// 自定义strcmp
int my_cmp(const char *s1, const char *s2) {
while (*s1 && (*s1 == *s2)) {
s1++; s2++;
}
return (unsigned char)*s1 - (unsigned char)*s2;
}
void test_my_funcs() {
char buf[20];
printf("自定义strlen: %zu\n", my_len("test"));
my_cpy(buf, "hello");
printf("自定义strcpy: %s\n", buf);
printf("自定义strcmp: %d\n", my_cmp("abc", "abd"));
}
五、字符串数组与指针数组
5.1 二维数组 vs 指针数组
void compare_arrays() {
// 二维字符数组:固定大小,连续存储
char names2d[3][20] = {"Alice", "Bob", "Charlie"};
printf("2D数组大小: %zu 字节\n", sizeof(names2d));
names2d[0][0] = 'A'; // 可修改
// 指针数组:指向字面量,节省空间
char *namesPtr[] = {"Alice", "Bob", "Charlie"};
printf("指针数组大小: %zu 字节\n", sizeof(namesPtr));
// namesPtr[0][0] = 'A'; // 不可修改(字面量)
// 动态分配
char **dynamic = malloc(3 * sizeof(char*));
for (int i = 0; i < 3; i++) {
dynamic[i] = malloc(20);
sprintf(dynamic[i], "Person %d", i+1);
}
for (int i = 0; i < 3; i++) {
free(dynamic[i]);
}
free(dynamic);
}
5.2 命令行参数处理
int main(int argc, char *argv[]) {
printf("程序名: %s\n", argv[0]);
for (int i = 1; i < argc; i++) {
if (argv[i][0] == '-') {
printf("选项: %s\n", argv[i]);
} else {
printf("参数: %s\n", argv[i]);
}
}
return 0;
}
六、内存管理与常见陷阱
6.1 典型错误与对策
void error_examples() {
// 错误1:缓冲区溢出
char small[5];
// strcpy(small, "Too long"); // 溢出!
strncpy(small, "Too long", sizeof(small) - 1);
small[4] = '\0';
// 错误2:未初始化指针
char *p;
// strcpy(p, "data"); // 未定义行为
p = malloc(10);
if (p) {
strcpy(p, "data");
printf("%s\n", p);
free(p);
}
// 错误3:修改字面量
// char *s = "immutable";
// s[0] = 'I'; // 运行时错误
// 错误4:内存泄漏
char *leak = malloc(100);
// 忘记 free(leak);
// 正确做法:
free(leak);
leak = NULL;
// 错误5:悬空指针
char *dangling = malloc(20);
free(dangling);
// printf("%s\n", dangling); // 危险
dangling = NULL;
}
6.2 C11安全函数
#define __STDC_WANT_LIB_EXT1__ 1
#include <stdio.h>
#include <string.h>
void safe_copy(char *dest, size_t size, const char *src) {
size_t i;
for (i = 0; i < size - 1 && src[i]; i++) {
dest[i] = src[i];
}
dest[i] = '\0';
}
void safe_concat(char *dest, size_t size, const char *src) {
size_t len = 0;
while (len < size && dest[len]) len++;
size_t remain = size - len;
size_t i;
for (i = 0; i < remain - 1 && src[i]; i++) {
dest[len + i] = src[i];
}
dest[len + i] = '\0';
}
七、文件与字符串操作
7.1 文件读写
void file_string_ops() {
FILE *fp = fopen("demo.txt", "w");
if (fp) {
fputs("Line 1\n", fp);
fputs("Line 2\n", fp);
fclose(fp);
}
fp = fopen("demo.txt", "r");
if (fp) {
char line[100];
while (fgets(line, sizeof(line), fp)) {
line[strcspn(line, "\n")] = '\0';
printf("读取: %s\n", line);
}
fclose(fp);
}
remove("demo.txt");
}
7.2 CSV解析
void parse_csv(const char *filename) {
FILE *fp = fopen(filename, "r");
if (!fp) return;
char line[256];
while (fgets(line, sizeof(line), fp)) {
line[strcspn(line, "\n")] = '\0';
char *field = strtok(line, ",");
while (field) {
printf("字段: %s\n", field);
field = strtok(NULL, ",");
}
}
fclose(fp);
}
八、搜索与替换实现
// 不区分大小写查找
int case_insensitive_find(const char *text, const char *pattern) {
while (*text && *pattern) {
if (tolower(*text) != tolower(*pattern)) {
return 0;
}
text++; pattern++;
}
return *pattern == '\0';
}
// 全局替换
char* global_replace(const char *str, const char *old, const char *new) {
int count = 0;
const char *tmp = str;
while ((tmp = strstr(tmp, old))) {
count++;
tmp += strlen(old);
}
size_t new_len = strlen(str) + count * (strlen(new)-strlen(old)) + 1;
char *result = malloc(new_len);
if (!result) return NULL;
char *cur = result;
const char *start = str, *found;
while ((found = strstr(start, old))) {
size_t len = found - start;
memcpy(cur, start, len); cur += len;
memcpy(cur, new, strlen(new)); cur += strlen(new);
start = found + strlen(old);
}
strcpy(cur, start);
return result;
}
九、简易文本编辑器核心
typedef struct {
char **lines;
int count, capacity;
} Editor;
Editor* create_editor(int cap) {
Editor *e = malloc(sizeof(Editor));
e->capacity = cap;
e->count = 0;
e->lines = malloc(cap * sizeof(char*));
return e;
}
int add_line(Editor *e, const char *text) {
if (e->count >= e->capacity) {
e->capacity *= 2;
e->lines = realloc(e->lines, e->capacity * sizeof(char*));
}
e->lines[e->count] = strdup(text);
return e->lines[e->count++] != NULL;
}
int insert_line(Editor *e, int pos, const char *text) {
if (pos < 0 || pos > e->count) return 0;
if (e->count >= e->capacity) {
e->capacity *= 2;
e->lines = realloc(e->lines, e->capacity * sizeof(char*));
}
for (int i = e->count; i > pos; i--)
e->lines[i] = e->lines[i-1];
e->lines[pos] = strdup(text);
e->count++;
return 1;
}
void free_editor(Editor *e) {
for (int i = 0; i < e->count; i++) free(e->lines[i]);
free(e->lines);
free(e);
}
十、核心要点总结
10.1 关键概念
- 字符串本质:以'\0'结尾的字符数组
- 指针指向:可以指向数组、字面量或堆内存
- 存储区域:栈、堆、常量区各有特点
- 安全性:边界检查与内存管理至关重要
10.2 最佳实践
- 使用
strncpy、strncat替代不安全版本 - 用
const修饰只读字符串 - 动态内存分配后及时释放并置空指针
- 确保字符串以'\0'正确终止
- 注意多字节字符编码(如UTF-8)