如何使用curl同时下载多个文件?

How to download multiple files at the same time with curl?

提问人:Jabu 提问时间:8/26/2023 最后编辑:273KJabu 更新时间:8/26/2023 访问量:82

问:

我正在使用我的私有令牌从我的 GitHub 存储库下载多个文件,我已经能够让它工作,但一个接一个地下载它们。

我想问一下如何在没有线程的情况下并行下载多个文件。 我可以使用任何卷曲版本。

#include <curl/curl.h>

static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
    size_t realsize = size * nmemb;
    auto& mem = *static_cast<std::string*>(userp);
    mem.append(static_cast<char*>(contents), realsize);
    SecureZeroMemory(contents, realsize);
    return realsize;
}

void curl(std::string& data, const std::string_view& url)
{
    CURL* curl_handle;
    CURLcode res;
    struct curl_slist* slist{};

    curl_handle = curl_easy_init();

    curl_easy_setopt(curl_handle, CURLOPT_URL, url.data());
    //curl_easy_setopt(curl_handle, CURLOPT_TCP_KEEPALIVE, 0);

    slist = curl_slist_append(slist, gitToken.data());
    slist = curl_slist_append(slist, "Accept: application/vnd.github.v3.raw");
    curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, slist);

    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "curl/7.55.1");

    // Necessary for downloading file from github repo.
    curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
   
    curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L); // only to debug

    res = curl_easy_perform(curl_handle);
    OutputDebugStringA(curl_easy_strerror(res));

    if(res != CURLE_OK)
       std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res) << '\n';

    curl_easy_cleanup(curl_handle);
    curl_global_cleanup();
}

int main()
{
    std::vector<std::string> urlList =
    {
        "...",
        "...",
        // ...
    };

    for (int i = 0; i < urlList.size(); i++)
    {
        std::string data;
        curl(data, urlList[i]);
        // save data to disk ...
    }
}
C++ GitHub libcurl

评论


答:

1赞 vengy 8/26/2023 #1

curl_multi API 由 libcurl 提供,无需线程即可支持并行传输。

#include <curl/curl.h>
#include <iostream>
#include <vector>
#include <cstring>

static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
    size_t realsize = size * nmemb;
    auto& mem = *static_cast<std::string*>(userp);
    mem.append(static_cast<char*>(contents), realsize);
    memset(contents, 0, realsize);
    return realsize;
}

static int progress_func(void *ptr, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
{
    std::string* url = static_cast<std::string*>(ptr);
    if (dltotal > 0)
    {
        std::cout << "URL: " << *url << ", Progress: " << (double(dlnow) / double(dltotal)) * 100 << "%" << std::endl;
    }
    return 0;
}

void setup_curl(CURL* curl_handle, std::string& data, const std::string& url, const std::string_view& gitToken)
{
    struct curl_slist* slist = nullptr;

    curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
    slist = curl_slist_append(slist, gitToken.data());
    slist = curl_slist_append(slist, "Accept: application/vnd.github.v3.raw");
    curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, slist);

    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "curl/7.55.1");
    curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
    curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L);

    curl_easy_setopt(curl_handle, CURLOPT_XFERINFOFUNCTION, progress_func);
    curl_easy_setopt(curl_handle, CURLOPT_XFERINFODATA, &url);
    curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L);
}

int main()
{
    std::vector<std::string> urlList =
    {
        "http://example.com/file1",
        "http://example.com/file2",
        // ...
    };

    CURLM* multi_handle;
    int still_running;

    curl_global_init(CURL_GLOBAL_DEFAULT);
    multi_handle = curl_multi_init();

    std::vector<CURL*> curl_handles;
    std::vector<std::string> dataVec(urlList.size());

    for (size_t i = 0; i < urlList.size(); ++i)
    {
        CURL* curl_handle = curl_easy_init();
        setup_curl(curl_handle, dataVec[i], urlList[i], "YOUR_GITHUB_TOKEN_HERE");
        curl_handles.push_back(curl_handle);
        curl_multi_add_handle(multi_handle, curl_handle);
    }

    curl_multi_perform(multi_handle, &still_running);

    while (still_running)
    {
        struct timeval timeout;
        int rc;

        fd_set fdread;
        fd_set fdwrite;
        fd_set fdexcep;
        int maxfd;

        timeout.tv_sec = 1;
        timeout.tv_usec = 0;

        FD_ZERO(&fdread);
        FD_ZERO(&fdwrite);
        FD_ZERO(&fdexcep);

        curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
        rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);

        switch (rc)
        {
            case -1:
                break;
            case 0:
            default:
                curl_multi_perform(multi_handle, &still_running);
                break;
        }
    }

    for (CURL* handle : curl_handles)
    {
        curl_multi_remove_handle(multi_handle, handle);
        curl_easy_cleanup(handle);
    }
    curl_multi_cleanup(multi_handle);
    curl_global_cleanup();

    // Now dataVec contains downloaded data for each URL. You can save it to disk or use as required.

}

评论

0赞 Jabu 8/26/2023
谢谢!您知道如何获取所有文件的下载进度吗?对于我正在使用的单个文件:但是在这种情况下,我不明白如何适应所有文件。curl_easy_setopt(curl_handle, CURLOPT_XFERINFOFUNCTION, downloadProgress_callback);curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0);
0赞 vengy 8/26/2023
该函数是 Windows Winsock 的一部分,因此您需要针对库的头文件和链接。此外,添加进度条是使用 CURLOPT_PROGRESSFUNCTION 完成的select#include <WinSock2.h>Ws2_32.lib
0赞 Jabu 8/26/2023
我知道您提到的 api,我的意思是在这种情况下如何调整它以根据所有文件进行计算。
0赞 vengy 8/26/2023
更新了示例以包含 URL 下载进度 %。
0赞 Jabu 8/26/2023
如何衡量所有文件的下载进度?我需要为每个文件一个回调吗?