构建简易容器运行时
容器本质上是通过内核隔离机制实现的受限进程环境。本实现利用Linux命名空间、cgroups和文件系统隔离技术构建轻量级容器运行时。
核心架构
int main(int argc, char **cmd_args) {
if (argc < 2) {
fprintf(stderr, "Usage: %s run|exec|ps [args]\n", cmd_args[0]);
return 1;
}
if (strcmp(cmd_args[1], "run") == 0) {
launch_container(cmd_args+2);
} else if (strcmp(cmd_args[1], "exec") == 0) {
attach_container(cmd_args+2);
} else if (strcmp(cmd_args[1], "ps") == 0) {
system("ls containers");
} else {
fprintf(stderr, "Invalid command\n");
return 1;
}
return 0;
}
文件系统隔离
void prepare_fs(char **params) {
char ts[20];
get_timestamp(ts);
char img_path[256], ctr_path[256];
sprintf(ctr_path, "containers/%s", ts);
sprintf(img_path, "images/%s", params[0]);
mkdir(ctr_path, 0755);
mkdir("runtime/tmpwork", 0755);
char mnt_cmd[512];
sprintf(mnt_cmd, "mount -t overlay overlay -o lowerdir=%s,upperdir=%s,workdir=runtime/tmpwork runtime/%s",
img_path, ctr_path, ts);
system(mnt_cmd);
params[0] = ts;
}
OverlayFS实现分层存储:底层镜像只读,容器修改通过上层目录实现。文件操作遵循写时复制原则,确保基础镜像不变性。
进程隔离与资源限制
void create_isolated_process(char **args) {
int ns_flags = CLONE_NEWUTS | CLONE_NEWPID | CLONE_NEWNS |
CLONE_NEWNET | CLONE_NEWIPC;
char root_path[256];
sprintf(root_path, "runtime/%s", args[0]);
args[0] = root_path;
pid_t child_pid = clone(container_entry,
malloc(4096)+4096,
ns_flags | SIGCHLD,
args);
setup_cgroups(child_pid);
config_network(child_pid);
waitpid(child_pid, NULL, 0);
umount(root_path);
}
容器初始化
int container_entry(void *params) {
char **args = (char **)params;
chroot(args[0]);
chdir("/");
mount("proc", "/proc", "proc", MS_NOEXEC|MS_NOSUID|MS_NODEV, NULL);
mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL);
setenv("PATH", "/bin:/usr/bin", 1);
setup_container_network();
execvp(args[1], args+1);
return 1;
}
clone()创建具有独立命名空间的新进程。chroot()限制文件系统视图,proc挂载提供进程信息隔离。
网络配置
void setup_veth(pid_t pid) {
char cmd[128];
sprintf(cmd, "ip link add vhost-%d type veth peer name vctr", pid);
system(cmd);
sprintf(cmd, "ip link set vctr netns %d", pid);
system(cmd);
sprintf(cmd, "ip link set vhost-%d up", pid);
system(cmd);
}
void setup_container_netif() {
system("ip link set lo up");
system("ip link set vctr up");
system("ip addr add 172.18.0.2/16 dev vctr");
system("ip route add default via 172.18.0.1");
}
veth设备对连接容器与主机网络,网桥模式实现跨容器通信。IP配置确保容器获得独立网络标识。
资源管控
void limit_resources(pid_t pid) {
char path[128];
sprintf(path, "/sys/fs/cgroup/cpu/ctr_%d", pid);
mkdir(path, 0755);
sprintf(path, "%s/tasks", path);
FILE *f = fopen(path, "w");
fprintf(f, "%d\n", pid);
fclose(f);
sprintf(path, "/sys/fs/cgroup/cpu/ctr_%d/cpu.cfs_quota_us", pid);
f = fopen(path, "w");
fprintf(f, "20000");
fclose(f);
}
cgroups通过文件系统接口实现CPU配额控制,限制容器资源使用上限。