如何在 puppeteer chrome 上启用多个文件下载

How to enable multiple file downloading on puppeteer chrome

提问人:jC61 提问时间:11/17/2023 更新时间:11/17/2023 访问量:10

问:

我想使用puppeteer下载文件,有时如果一个文件下载延迟,我会收到这种通知

[下载多个文件通知](https://i.stack.imgur.com/rKIoB.png)

当然,我可以手动单击[允许],但我想打开具有允许多个文件下载设置的傀儡师测试浏览器。

请让我知道我怎么能解决这个问题

这是我目前的完整源代码。如果您有任何解决方案来更新此代码,请建议我。谢谢

const puppeteer = require('puppeteer-extra');
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
const fs = require('fs');

const stealth = StealthPlugin();
puppeteer.use(StealthPlugin());
stealth.enabledEvasions.delete("iframe.contentWindow");

async function delay(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
}

const renameFile = async (oldPath, newPath) => {
    return new Promise((resolve, reject) => {
        fs.rename(oldPath, newPath, (err) => {
            if (err) {
                reject(err);
            } else {
                resolve();
            }
        });
    });
};

(async () => {
    const browser = await puppeteer.launch({
        headless: false,
        args: ["--start-maximized"],
        timeout: 6000000,
        protocolTimeout: 6000000,
        defaultViewport: null,
    });
    const page = await browser.newPage();
    await page.goto(
        "https://clerk.clevelandcountyok.com/web/search/DOCSEARCH25S5"
    );
    await page.waitForSelector("#submitDisclaimerAccept");
    await page.click("#submitDisclaimerAccept");

    await page.waitForSelector("#field_RecDateID_DOT_StartDate");
    await page.type("#field_RecDateID_DOT_StartDate", "01/01/1995");

    await page.waitForSelector("#field_RecDateID_DOT_EndDate");
    await page.type("#field_RecDateID_DOT_EndDate", "01/03/1995");

    await page.waitForSelector("#searchButton");
    await page.click("#searchButton");

    await page.waitForSelector(
        "li.ss-search-row.ui-li-static.ui-body-inherit.ui-first-child"
    );

    // Get the number of pages
    let pageNum = await page.$$eval('.selfServiceSearchResultHeaderLeft', divs => {
        let text = divs[1].innerText.split('for ')[1]; // Extract text after 'for'
        return parseInt(text);
    });
    console.log(pageNum);

    // Go to the first element
    const firstElement = await page.$(
        "li.ss-search-row.ui-li-static.ui-body-inherit.ui-first-child"
    );
    await firstElement.click();
    await delay(2000)
    const viewPDF = await page.$(
        "p.selfServiceSearchFullResult.selfServiceSearchResultNavigation"
    );
    await viewPDF.click();


    for (let i = 0; i < pageNum; i++) {
        console.log(i)
        try {
            await page.waitForSelector('#documentIndexingInformation ul li.ui-li-static.ui-body-inherit');

            await delay(2000)
            await page.waitForSelector('iframe.ss-pdfjs-lviewer');
            const elementHandle = await page.$('iframe.ss-pdfjs-lviewer');
            const frame = await elementHandle.contentFrame();
            await frame.waitForSelector('#numPages');

            let buttonSelector = 'button#printCustom';
            await page.waitForSelector(buttonSelector);
            let buttonHandle = await page.$(buttonSelector);
            let buttonHref = await page.evaluate((button) => button.getAttribute('data-href'), buttonHandle);
            console.log(buttonHref);

            const data = await page.evaluate(() => {
                const documentType = document.querySelector('#documentIndexingInformation ul li.ui-li-static.ui-body-inherit')?.innerText;
                const documentNumber = document.querySelector('.doc-viewer tr:nth-child(1) td div:nth-child(3)')?.innerText;
                const recordingDate = document.querySelector('.doc-viewer tr:nth-child(3) td div:nth-child(3)')?.innerText;
                const numberPages = document.querySelector('.doc-viewer tr:nth-child(4) td div:nth-child(3)')?.innerText;
                const documentDate = document.querySelector('.doc-viewer tr:nth-child(5) td div:nth-child(3)')?.innerText;

                const granteesElement = document.querySelectorAll('li.ui-li-static.ui-body-inherit.ui-last-child')[2];
                const granteeElements = granteesElement?.querySelectorAll("div ul li");
                const grantee = [];
                if (granteeElements) {
                    for (const elem of granteeElements) {
                        grantee.push(elem.innerText);
                    }
                }
                const granteeString = grantee.join(', ');

                const grantorElement = granteesElement?.querySelector("tr:nth-child(2) td div:nth-child(2)");
                const grantor = [];
                if (grantorElement?.querySelector("ul")) {
                    const grantorElements = grantorElement.querySelectorAll("ul li");
                    for (const elem of grantorElements) {
                        grantor.push(elem.innerText);
                    }
                } else if (grantorElement) {
                    grantor.push(grantorElement.innerText);
                }
                const grantorString = grantor.join(', ');

                const legalsElement = document.querySelectorAll('li.ui-li-static.ui-body-inherit.ui-last-child')[3];
                const legalElements = legalsElement?.querySelectorAll("tr td div:nth-child(2) ul li");
                const legal = [];
                if (legalElements?.length > 0) {
                    for (const elem of legalElements) {
                        const legalText = elem.innerText;
                        if (legalText.includes('Legal Remarks:')) {
                            legal.push(legalText);
                        }
                    }
                } else if (legalsElement) {
                    const legalText = legalsElement?.querySelector("tr td div:nth-child(2)")?.innerText;
                    if (legalText && !legalText.includes('Documnet Remarks:') && legalText.includes('Legal Remarks:')) {
                        legal.push(legalText);
                    }
                }
                const legalString = legal.join(', ') || "";

                return [documentType, documentNumber, recordingDate, numberPages, documentDate, granteeString, grantorString, legalString];
            });

            data.push(buttonHref)
            console.log(data)

            // Download pdfs part
            let pdfID = data[1];
            console.log(pdfID)
            let pdfURL = `https://clerk.clevelandcountyok.com${buttonHref}`;
            await frame.evaluate(
                (link, downloadedFileName, downloadDelay) => {
                    const a = document.createElement("a");
                    a.href = link;
                    a.download = downloadedFileName;
                    a.style.display = "none";
                    document.body.appendChild(a);
                    a.click();
                    setTimeout(() => {
                        document.body.removeChild(a);
                        resolve();
                    }, downloadDelay);
                    console.log(link, "successfully downloaded!")
                },
                pdfURL,
                `cleveland_${data[0]}_${pdfID}.pdf`,
                20000
            );

            const clickable = await page.$$(
                "a.ui-link.ui-btn.ui-btn-b.ui-btn-inline.ui-shadow.ui-corner-all"
            );

            if (clickable.length > 1) {
                await delay(1000); // Added delay before click to allow element to be ready
                await clickable[1].click();
            }
        } catch (err) {
            console.error("An error occurred during an iteration:", err);
        }
    }
    await browser.close();
})();
节点.js google-chrome web-scraping puppeteer webautomation

评论


答: 暂无答案