提问人:RootPhoenix 提问时间:9/29/2023 最后编辑:RootPhoenix 更新时间:10/2/2023 访问量:135
即使客户端不发送数据,也会发生 UNIX 套接字读取
UNIX socket read happening even though client does not send data
问:
我面临一个非常奇怪的问题,我有一个简单的 UNIX STREAM 套接字服务器/客户端代码在 Linux 上运行。客户端偶尔会向服务器发送一条消息(我也测试了只发送一次),但是在收到第一条消息后,即使客户端没有发送任何消息,服务器也会继续打印相同的消息。
整个消息是重新创建的,没有静态数据等,这可能会导致同一消息在多个调用中持久存在。
客户端代码:
int g_fd = -1;
#define SERVER_SOCK "/tmp/server_sock"
int init_fd(void) {
g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (g_fd < 0) {
log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
sa.sun_family = AF_UNIX;
snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);
if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
int flags = fcntl(g_fd, F_GETFL, 0);
fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
return 0;
}
void send_event(void) {
if (g_fd < 0) {
if (init_fd() < 0) {
log_info("alloc", "failed to connect to server");
return;
}
}
json_t *jtc = json_object();
json_object_set_new(jtc, "msgType", json_integer(650));
json_t *jtype = json_object();
json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
json_object_set_new(jtc, "data", jtype);
char *j_dump_string = NULL;
j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
close(g_fd);
g_fd = -1;
log_info("alloc", "failed to send message to server");
}
log_info("alloc", "GNA: da_send: %s", j_dump_string);
free(j_dump_string);
json_decref(jtc);
}
int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
if (!write_buf)
return -1;
write_buf[0] = (nbytes >> 24);
write_buf[1] = (nbytes >> 16);
write_buf[2] = (nbytes >> 8);
write_buf[3] = (nbytes);
memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
int save_err = errno;
free(write_buf);
errno = save_err;
return -1;
}
free(write_buf);
return 0;
}
static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nwritten;
nleft = nbytes;
while (nleft) {
nwritten = write(fd, ptr, nleft);
if (nwritten <= 0) {
if (errno != EAGAIN) return -1;
continue;
}
nleft -= nwritten;
ptr += nwritten;
}
return 0;
}
客户端使用 libev 处理套接字,服务器使用 select:
static int server_init(void) {
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
return -1;
}
unlink(SERVER_SOCK);
sa.sun_family = AF_UNIX;
strcpy(sa.sun_path, SERVER_SOCK);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
if (listen(fd, 5)) {
DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
DebugLog(INFO, "GNA: Create server ready to accept\n");
ev_add_fd(fd, EV_READ, connection_callback, NULL);
return RET_OK;
}
static void connection_callback(int fd, int flags, void *data) {
struct sockaddr_un remote;
socklen_t rlen = sizeof(struct sockaddr_un);
int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
if (connection_fd < 0) {
DebugLog(ERROR, "%s:Accept failed", __func__);
return;
}
DebugLog(INFO, "GNA: received new connection\n");
ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}
static void request_callback(int fd, int flags, void *data) {
char *msg = NULL;
size_t msglen = 0;
if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
strerror(errno));
ev_del_fd(fd);
close(fd); // close the FD we'll reopen a new one next time
if (msg)
free(msg);
return;
}
msg[msglen] = '\0'; // ensure NULL termination
handle_msg(msg, msglen, fd);
if (msg)
free(msg);
return;
}
static void handle_msg(char *msg, int msglen, int fd) {
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}
int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
uint8_t hd[4];
if (read_loop(fd, hd, 4) < 0)
return -1;
size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
(*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
*nbytes = payload_len;
if (read_loop(fd, *ptr, *nbytes) < 0)
return -1;
return 0;
}
static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nread;
nleft = nbytes;
while (nleft) {
nread = read(fd, ptr, nleft);
if (nread < 0) {
if (errno != EAGAIN) return -1;
continue;
}
if (nread == 0)
break;
nleft -= nread;
ptr += nread;
}
if (nleft != 0) return -1;
return 0;
}
日志每隔几秒钟就会打印一次,尽管没有人发送任何内容。
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
在客户端,只有一个没有其他人调用的发送函数,并且此日志只出现一次。
log_info("alloc", "GNA: da_send: %s", j_dump_string);
谁能建议为什么会发生这种情况。以及如何避免?
更新1:在对客户端和服务器上的 strace 提出建议后,我发现服务器在 select 中设置了 fd 并定期获取读取事件
08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
我杀死了客户端并在开头附加了 strace,以便它可以获取套接字的 fd,客户端没有在 fd 14 上写入任何东西,除了一次:
08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL) = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57
这是 fd 14 的唯一写入。
更新2:上述函数被编译为 libabc.so 库的一部分。在我的消息中放入 getpid() 和 gettimeofday() 后,我发现另一个守护程序正在使用这个库并调用一个调用该函数的包装函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在正在创建一个 .pid 文件来存储客户端的 pid,然后在我将当前进程 pid 与客户端的 pid 进行比较。send_event
send_event
send_event
答:
上述函数被编译为库的一部分。在我的消息中输入后,我发现另一个守护程序正在使用这个库并调用一个正在调用该函数的包装函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在正在创建一个存储客户端 pid 的文件,然后在我将当前进程 pid 与客户端的 pid 进行比较。send_event
libabc.so
getpid()
gettimeofday()
send_event
.pid
send_event
感谢@pts的建议。
评论