提问人:wang larry 提问时间:6/30/2023 更新时间:6/30/2023 访问量:23
在块设备上运行 FIO/Vdbench 时,是否可以使用 ptrace 来模拟特定 lba 范围内的 IO 错误
is it possible to use the ptrace to simulate the IO error on the specific lba range when running the FIO/Vdbench on the block device
问:
作为标题,我正在寻找一种方法来模拟使用其他 IO 工具运行工作负载(通过 FIO/Vdbench)时的 IO 错误或慢磁盘问题,我尝试使用 ptrace 通过以下代码来做到这一点
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <linux/blkpg.h>
#include <sys/mman.h>
#include <signal.h>
#include <sys/wait.h>
#include <dirent.h>
#include <sys/user.h>
#include <sys/ptrace.h>
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
#define IO_ERROR_RANGE_START 0x1000000
#define IO_ERROR_RANGE_END 0x2000000
int pid_for_device(char *device)
{
char cmd[256];
snprintf(cmd, sizeof(cmd), "lsof %s", device);
FILE *fp = popen(cmd, "r");
if (fp == NULL)
{
perror("popen");
return -1;
}
char buf[256];
int pid = -1;
while (fgets(buf, sizeof(buf), fp) != NULL)
{
if (strstr(buf, "r") != NULL)
{
char *ptr = strtok(buf, " ");
ptr = strtok(NULL, " ");
pid = atoi(ptr);
break;
}
}
pclose(fp);
return pid;
}
void simulate_io_error(pid_t pid, unsigned long addr, unsigned long count)
{
struct user_regs_struct regs;
if (ptrace(PTRACE_GETREGS, pid, NULL, ®s) == -1)
{
perror("ptrace(PTRACE_GETREGS) failed");
exit(1);
}
if (addr >= IO_ERROR_RANGE_START && addr + count <= IO_ERROR_RANGE_END)
{
printf("Simulating I/O error for range %lu-%lu\n", addr, addr + count);
regs.rax = -EIO;
}
if (ptrace(PTRACE_SETREGS, pid, NULL, ®s) == -1)
{
perror("ptrace(PTRACE_SETREGS) failed");
exit(1);
}
}
int main(int argc, char *argv[])
{
if (argc != 5)
{
printf("Usage: %s <device file> <start range in GB> <end range in GB> <duration in seconds>\n", argv[0]);
return 1;
}
char *device_file = argv[1];
long start_range_gb = strtol(argv[2], NULL, 10);
long end_range_gb = strtol(argv[3], NULL, 10);
int duration = atoi(argv[4]);
int fd = open(device_file, O_RDWR);
if (fd == -1)
{
perror("open failed");
return 1;
}
unsigned long start_range_lba = start_range_gb * (1024 * 1024 * 1024 / 512);
unsigned long end_range_lba = end_range_gb * (1024 * 1024 * 1024 / 512);
printf("Simulating I/O errors in range %lu-%lu for %d seconds\n", start_range_lba, end_range_lba, duration);
unsigned long range_size = (end_range_lba - start_range_lba) * 512;
unsigned long range_start = start_range_lba * 512;
void *mapped = mmap(NULL, range_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, range_start);
if (mapped == MAP_FAILED)
{
perror("mmap failed");
return 1;
}
pid_t pid = pid_for_device(device_file);
if (pid == -1)
{
perror("get_pid_from_lsof failed");
return 1;
}
int i = 0;
for (; i < duration; i++)
{
unsigned long addr = (unsigned long)mapped + (i % (range_size / PAGE_SIZE)) * PAGE_SIZE;
unsigned long count = PAGE_SIZE;
simulate_io_error(pid, addr, count);
usleep(1000);
}
munmap(mapped, range_size);
close(fd);
return 0;
}
程序将首先使用命令获取进程的 PID
lsofof /dev/sda
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
fio 76954 root 3r BLK 134,144 0x3b8ee000 1035083385 /dev/sda
然后它尝试使用 ptrace 连接 PID 76954,但是,我的程序总是以错误 ptrace(PTRACE_GETREGS) 失败告终:即使进程已准备就绪并正在运行,也没有这样的进程,我的代码中有什么问题吗?
巴西 拉里
答: 暂无答案
评论