无法使用 C 将数据提取从 Selenium 发送到 Excel#

Can't send data extract from Selenium to Excel using C#

提问人:olamundo97 提问时间:11/15/2023 更新时间:11/15/2023 访问量:43

问:

我正在尝试使用从网站中提取的项目列表中的数据(有关公司名称、职位名称等的信息)创建一个 excel 文件。因此,我正在使用 OPENXML 库将此数据放入 Excel,但它不起作用。我得到的只是一个空的 excel 文件,里面有任何东西。问题是什么??

using JobWebScraper.Model;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using OpenQA.Selenium.Support.UI;
using SeleniumExtras.WaitHelpers;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading.Tasks;


namespace JobWebScraper.Driver
{
    public class AutomationWeb
    {
        public IWebDriver driver;

        public AutomationWeb()
        {
            var options = new ChromeOptions();
            options.AddArgument("no-sandbox");
            driver = new ChromeDriver(); //vai abrir o navegador do Chrome
        }
         
        public DataTable TestWeb()
        {

            var dataTable = new DataTable();

            var items = new List<Item>();

            driver.Navigate().GoToUrl("https://br.indeed.com/");

            // Prompt user for input
            Console.WriteLine("Bem vindo ao JobSearcher");
            Console.WriteLine("Digite qual área você procura: ");
            string userInput = Console.ReadLine();

            Console.WriteLine("Trabalho remoto? S/N");
            string remoteInput = Console.ReadLine();


            // Use an explicit wait for the search input field to be clickable and then send keys
            WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(10));

            // Wait for the search input field to be clickable and then send keys
            IWebElement searchInput = wait.Until(ExpectedConditions.ElementToBeClickable(By.XPath("//*[@id=\"text-input-what\"]")));

            // Send keys using the user input
            searchInput.SendKeys(userInput);
            searchInput.SendKeys(Keys.Enter);


            // remote work

            // Check if the answer for "Trabalho remoto? S/N" is 'S'
            if (remoteInput.ToUpper() == "S")
            {
                IWebElement locationInput = wait.Until(ExpectedConditions.ElementToBeClickable(By.Id("text-input-where")));

                // Clear any existing text using the "Delete" key
                locationInput.SendKeys(Keys.End); // Move the cursor to the end of the text
                locationInput.SendKeys(Keys.Shift + Keys.Home); // Select the entire text
                locationInput.SendKeys(Keys.Delete); // Delete the selected text


                locationInput.SendKeys("remoto");
                locationInput.SendKeys(Keys.Enter);

                // Locate the all the jobs
                IWebElement allJobs = driver.FindElement(By.XPath("//*[@id=\"mosaic-provider-jobcards\"]/ul"));

                // Get all li elements within the ul
                var eachJob = allJobs.FindElements(By.TagName("li"));

                IWebElement closeGooglePopup = null;
                IWebElement closePopup = null;

                try
                {
                    closeGooglePopup = wait.Until(ExpectedConditions.ElementToBeClickable(By.XPath("//*[@id=\"google-Only-Modal\"]/div/div[1]/button")));
                    closePopup = wait.Until(ExpectedConditions.ElementToBeClickable(By.XPath("//*[@id=\"mosaic-desktopserpjapopup\"]/div[1]/button")));
                }
                catch (NoSuchElementException)
                {
                    Console.WriteLine("Element not found or clickable");
                }

                if (closePopup != null)
                {
                    closeGooglePopup.Click();
                    closePopup.Click();
                }


                // Create DataTable columns
                dataTable.Columns.Add("Nome da Empresa");
                dataTable.Columns.Add("Titulo");
                dataTable.Columns.Add("Data");
                dataTable.Columns.Add("Link");


                foreach (var job in eachJob)
                {
                    var item = new Item();

                    foreach (var titleElement in job.FindElements(By.CssSelector("span[title]")))
                    {
                        // Access the title attribute for each element
                        string title = titleElement.GetAttribute("title");

                        // Add the title to the list
                        item.Titles.Add(title);


                        var companyNameElement = titleElement.FindElement(By.XPath("ancestor::li/descendant::span[@data-testid='company-name']"));

                        var publishedElement = titleElement.FindElement(By.XPath("ancestor::li/descendant::span[@class='date']"));

                        // Localizar todas as tags <a> com a classe 'jcs-JobTitle'
                        var joblinkElements = job.FindElements(By.CssSelector("a.jcs-JobTitle"));

                        Console.WriteLine($"Vaga: {title}, Empresa: {companyNameElement.Text}, Data: {publishedElement.Text}");


                        foreach (var joblinkElement in joblinkElements)
                        {
                            // Obter o valor do atributo href
                            string jobLink = joblinkElement.GetAttribute("href");

                            item.JobLinks.Add(jobLink);

                            Console.WriteLine($"- {jobLink}");

                        }


                        // Add the item to the items list
                        items.Add(new Item
                        {
                            CompanyName = companyNameElement.Text,
                            PublishDate = publishedElement.Text,
                        });
                    }

                }

                foreach (var item in items)
                {
                    dataTable.Rows.Add(item.CompanyName, item.PublishDate, item.Titles, item.JobLinks);
                }


                return dataTable;

            }
            else
            {
                Console.WriteLine("Trabalho presencial? S/N");
                Console.ReadLine();
                Console.WriteLine("Onde?");
                return null; // Adding a return statement for non-'S' inputs

            }


        }

    }
}

using System;
using System.Data;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using JobWebScraper.Driver;
using JobWebScraper.Model;
using OpenXmlCell = DocumentFormat.OpenXml.Spreadsheet.Cell;
using OpenXmlRow = DocumentFormat.OpenXml.Spreadsheet.Row;



namespace JobWebScraper
{
    class Program
    {
        static void Main(string[] args)
        {
            var web = new AutomationWeb();
            var dataTable = web.TestWeb();

            if (dataTable != null)
            {
                CreateExcel(dataTable);
            }
        }


        public static void CreateExcel(System.Data.DataTable dataTable)
        {
            var fileName = "VagasParaAlegrarSeuDia.xlsx";

            // Create a spreadsheet document
            using (var document = SpreadsheetDocument.Create(fileName, SpreadsheetDocumentType.Workbook))
            {
                WorkbookPart workbookPart = document.AddWorkbookPart();
                workbookPart.Workbook = new Workbook();

                var worksheetPart = workbookPart.AddNewPart<WorksheetPart>();
                worksheetPart.Worksheet = new Worksheet(new SheetData());

                var sheets = workbookPart.Workbook.AppendChild(new Sheets());
                sheets.AppendChild(new Sheet
                {
                    Id = workbookPart.GetIdOfPart(worksheetPart),
                    SheetId = 1,
                    Name = "Jobs Data"
                });

                var sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();

                // Add headers
                var headerRow = new Row();

                foreach (DataColumn column in dataTable.Columns)
                {
                    headerRow.AppendChild(new Cell(new CellValue(column.ColumnName)));
                }

                sheetData.AppendChild(headerRow);

                // Add data
                foreach (DataRow dataRow in dataTable.Rows)
                {
                    var newRow = new Row();

                    foreach (var value in dataRow.ItemArray)
                    {
                        // Construct the cell
                        var cell = new Cell(new CellValue(value.ToString()));
                        newRow.AppendChild(cell);
                    }

                    sheetData.AppendChild(newRow);
                }

                // Save changes
                workbookPart.Workbook.Save();
            }

            Console.WriteLine("Excel file created successfully.");
        }


    }
}

C# asp.net Excel Selenium-WebDriver

评论

0赞 Emanuele 11/15/2023
也没有新床单吗?也可以尝试使用“worksheetPart.Worksheet.Save()”
0赞 olamundo97 11/15/2023
@Emanuele没有。我尝试了您的解决方案,但仍然不起作用:/
0赞 olamundo97 11/15/2023
我在这样的行中尝试了 Console.WriteLine“ Console.WriteLine(”Row data: “ + string.Join(“, ”, dataRow.ItemArray));它在 jobLink 中给出了这个错误:System.Collections.Generic.List'1[System.String]
0赞 pcalkins 11/16/2023
我自己更喜欢 NPOI,但对于 OpenXML,我会检查此处的示例: learn.microsoft.com/en-us/dotnet/api/... (他们首先使用参数和值定义单元格......然后将其追加到新行,然后将该行追加到工作表中。您将迭代以将行和单元格追加到工作表中...迭代并创建整个工作表后。将其设置为 WorksheetPart...
1赞 olamundo97 11/16/2023
哦,我最后一次尝试使用OpenXML,它起作用了哈哈。当我添加工作表时。Append(sheetData) 它起作用了

答: 暂无答案