使用隧道的 HTTPS 代理-解网

问：

我是网络新手。我正在尝试构建一个在代理上运行的程序。但是，这仅适用于基于 HTTP 的请求。我正在尝试为HTTPS实现它。

我遇到了隧道的概念，其中通过代理服务器在客户端和目标服务器之间创建隧道。但是，我仍然不清楚，当客户端向代理发送HTTPS数据包时，代理服务器如何知道将数据包转发到哪个IP？

我确实听过客户端与代理以及代理与目标服务器之间的初始 TCP 握手，但这首先会发生吗？代理对目标服务器一无所知，也因为数据包是加密的，所以代理无权访问标头字段，这与 HTTP 请求不同。Host

有人可以向我澄清一下吗？

顺便说一句，我在 StackOverflow 上遇到了多篇关于 HTTP 隧道的文章和答案，但只需在客户端使用 HTTP 方法进行隧道。但是我只能访问代理服务器，我无法控制客户端发送给我的内容。CONNECT

此外，我希望在 C 中从头开始实现这个隧道。

以下是我的HTTP代理代码实现，以防万一：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>

#define PROXY_PORT 8080
#define BLACKLISTED_URL "example.com"

void handle_client(int client_socket) {
    char request[4096];
    ssize_t bytes_received = 0;

    printf("Inside handle_client\n");

    int target_socket;
    struct sockaddr_in target_addr;

    bytes_received = recv(client_socket,request,sizeof(request)-1,0);
    printf("Inside while loop\n");

    // Find the start of the "Host:" field
    char* hostStart = strstr(request, "Host:");
    if (hostStart == NULL) {
        printf("No Host field found in the request\n");
        return;
    }

    // Skip past "Host: " to the start of the URL
    hostStart += strlen("Host: ");

    // Find the end of the URL
    char* hostEnd = strchr(hostStart, '\n');
    if (hostEnd == NULL) {
        printf("No end of line found after the Host field\n");
        return;
    }

    // Copy the URL into a new string
    size_t urlLength = hostEnd - hostStart;
    char* url = malloc(urlLength + 1);
    if (url == NULL) {
        perror("Error allocating memory for URL");
        return;
    }
    strncpy(url, hostStart, urlLength);
    url[urlLength] = '\0'; // Null-terminate the string

    // Remove trailing newline or carriage return characters
    url[strcspn(url, "\r\n")] = '\0';
    
    printf("Extracted URL: %s\n", url);


    struct addrinfo hints = {0};
    hints.ai_flags = AI_NUMERICHOST;
    hints.ai_family = AF_INET;
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_protocol = IPPROTO_TCP;

    struct addrinfo *addr = NULL;
    struct sockaddr_in target;

    int ret = getaddrinfo(url, NULL, &hints, &addr);
    if (ret == EAI_NONAME) // not an IP, retry as a hostname
    {
        hints.ai_flags = 0;
        ret = getaddrinfo(url, NULL, &hints, &addr);
    }
    if (ret == 0)
    {
        target = *(struct sockaddr_in*)(addr->ai_addr);
        freeaddrinfo(addr);
    }

    printf("IP address: %s & %s \n", inet_ntoa(target.sin_addr), url);

    // Create a connection to the target server
    target_socket = socket(AF_INET, SOCK_STREAM, 0);
    // struct sockaddr_in target_addr;
    target_addr.sin_family = AF_INET;
    target_addr.sin_port = htons(80);
    target_addr.sin_addr.s_addr = target.sin_addr.s_addr;

    printf("Forwarding request to the target server: \n");

    
    if (strstr(url, BLACKLISTED_URL) == NULL) {
        printf("Sending request to target server\n");
    }

    printf("Received request from client: %s\n", request);

    // Check if the request contains a blacklisted URL
    if (strstr(request, BLACKLISTED_URL) != NULL) {
        printf("URL blocked: %s\n", BLACKLISTED_URL);
        const char* response = "HTTP/1.1 403 Forbidden\r\nContent-Length: 19\r\n\r\nAccess Denied: URL blocked\r\n";
        send(client_socket, response, strlen(response), 0);
        close(client_socket);
        return;
    }

    printf("Forwarding request to the target server...\n");

    int ret_;
    if ((ret_ = connect(target_socket, (struct sockaddr*)&target_addr, sizeof(target_addr))) < 0) {
        perror("Error connecting to the target server");
        close(client_socket);
        return;
    }

    printf("Connected to the target server\n");

    // Forward the request to the target server
    send(target_socket, request, bytes_received, 0);

    printf("Request forwarded to the target server\n");

    // Forward the response from the target server to the client
    char response_buffer[4096];
    ssize_t bytes_sent;
    while ((bytes_received = recv(target_socket, response_buffer, sizeof(response_buffer), 0)) > 0) {
        bytes_sent = send(client_socket, response_buffer, bytes_received, 0);
        if (bytes_sent < 0) {
            perror("Error sending response to the client");
            break;
        }
    }

    printf("Response forwarded to the client\n");


    // Close the sockets
    close(client_socket);
    close(target_socket);
}

int main() {
    int proxy_socket = socket(AF_INET, SOCK_STREAM, 0);
    struct sockaddr_in proxy_addr;
    proxy_addr.sin_family = AF_INET;
    proxy_addr.sin_port = htons(PROXY_PORT);
    proxy_addr.sin_addr.s_addr = inet_addr("127.0.0.1");

    if (bind(proxy_socket, (struct sockaddr*)&proxy_addr, sizeof(proxy_addr)) < 0) {
        perror("Error binding to the proxy port");
        return 1;
    }

    if (listen(proxy_socket, 10) < 0) {
        perror("Error listening on the proxy socket");
        return 1;
    }

    printf("Proxy server listening on port %d...\n", PROXY_PORT);

    while (1) {
        struct sockaddr_in client_addr;
        socklen_t client_addr_len = sizeof(client_addr);
        int client_socket = accept(proxy_socket, (struct sockaddr*)&client_addr, &client_addr_len);

        printf("Connection accepted from %s:%d\n", inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port));
        if (client_socket < 0) {
            perror("Error accepting client connection");
        } else {
            printf("Handling client request...\n");
            handle_client(client_socket);
            printf("Done handling client request\n");
        }
    }

    close(proxy_socket);
    return 0;
}

C 套接字 HTTPS 代理 HTTP 隧道

是的。在这种情况下，客户端首先创建与代理的 TCP 连接，然后向指定目标的代理发送隧道请求，然后代理创建与目标的 TCP 连接，如果成功，则代理在两个 TCP 连接之间来回传递所有后续原始字节，直到一方断开连接，然后代理断开与另一方的连接。

代理对目标服务器一无所知

是的，确实如此，因为客户端会预先告诉代理目标是什么。这就是大多数代理协议的工作方式（即、等）。CONNECTSOCKS

由于数据包是加密的，因此代理无权访问标头字段，这与 HTTP 请求不同。Host

对于非加密的 HTTP 代理，代理本身充当 HTTP 服务器，然后客户端直接向该服务器发送 HTTP 消息，就好像它是目标服务器一样。客户端必须在每条 HTTP 消息中指定目标服务器，无论是在 HTTP 请求行中作为绝对 URL 还是在 HTTP 标头中。然后，代理可以读取该目标并将 HTTP 消息转发到指定的服务器。在这种情况下，它不需要持久的TCP隧道，因为HTTP从一条消息到下一条消息都是无状态的。Host

但是，代理加密的 HTTPS 并非如此。代理根本无法解析 HTTPS 消息，因为它没有加密详细信息。它所能做的就是在客户端和目标之间创建一条隧道，然后通过隧道在它们之间传递原始字节。客户端和目标之间通过隧道协商加密详细信息，然后通过隧道在它们之间交换任何加密数据消息。就代理而言，隧道只是携带不透明的数据。

顺便说一句，我在 StackOverflow 上遇到了多篇关于 HTTP 隧道的文章和答案，但只需在客户端使用 HTTP 方法进行隧道。但是我只能访问代理服务器，我无法控制客户端发送给我的内容。CONNECT

代理决定它实现的代理协议 - HTTP、CONNECT、SOCKS 等。然后，客户端必须遵循该协议，以便通过该协议来回传递应用程序数据。

谢谢@Remy的回答。所以基本上如果客户端在设置中配置了使用代理，它会在TCP握手时自行发送一个CONNECT方法数据包，我作为代理可以监听和读取，然后使用这个CONNECT数据包中的Host URL，我可以在客户端和主机之间构建一个隧道。我的解释正确吗？

0赞 Remy Lebeau 11/14/2023

@MT16客户端是否配置为通过代理使用 HTTP/S，是。正如我在回答中提到的，还有其他类型的代理可用。此外，请求中没有 URL，只有 host：port。您是否阅读了该方法的任何文档？CONNECTCONNECTCONNECT

上一个：使用 https.request 的 POST 调用返回 408

下一个：如何修复 Ecommerse 混合内容 Https 错误 [已关闭]

使用隧道的 HTTPS 代理

HTTPS Proxy using Tunnelling

评论

评论