提问人:Shah Zeb 提问时间:10/22/2023 更新时间:10/22/2023 访问量:52
为什么 Xpath 在 Selenium 中没有产生正确数量的元素?
Why is Xpath not yielding correct number of elements in Selenium?
问:
我正在尝试使用 Selenium 和 Xpath 从网站上抓取数据,但我遇到了一个奇怪的问题。
网站链接:dexcheck
预期成果: 当我查看该页面时,我希望 Xpath 产生 16 个“已实现的 ROI %”数据点。
实际结果: Xpath 有时只给出 11 个甚至更少的结果。
观察:
此问题不一致。有时有效,有时无效。 有趣的是,如果我将浏览器视图缩小到 25%,则在使用 Chrome DevTools 时,问题似乎消失了。但是,在使用 Selenium 时,即使浏览器以 25% 的缩小率启动,也不会复制相同的内容。
我已确保向下滚动以加载所有元素。这是我的滚动机制:
def scroll_to_load(driver, container_xpath):
try:
inside_table = driver.find_element(By.XPATH,'((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
inside_table.click()
except:
pass
while True:
old_page = driver.page_source
actions = ActionChains(driver)
for _ in range(16):
actions.send_keys(Keys.PAGE_DOWN).perform()
time.sleep(5)
new_page = driver.page_source
if new_page == old_page:
print('new page == old page')
break
这个 Xpath 和方法在过去运行良好,但在最近的更新后停止了。我不确定网站结构是否发生了变化,或者我是否遗漏了什么。
完整的代码在下面!
import time
import pandas as pd
from scrapy import Selector
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from seleniumbase import Driver
def scroll_to_load(driver, container_xpath):
try:
inside_table = driver.find_element(By.XPATH, '((//div[@class="crypto-pnl-table"]/div)[3]/div/p)[1]')
inside_table.click()
except Exception:
pass
while True:
old_page = driver.page_source
actions = ActionChains(driver)
for _ in range(16):
actions.send_keys(Keys.PAGE_DOWN).perform()
time.sleep(5)
new_page = driver.page_source
if new_page == old_page:
break
def get_driver():
options = Options()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
options.set_capability("pageLoadStrategy", "normal")
options.add_argument("window-size=1200x800")
options.add_argument("--enable-javascript")
options.add_argument("--headless")
prefs = {"profile.managed_default_content_settings.images": 2, "permissions.default.stylesheet": 2}
options.add_experimental_option("prefs", prefs)
driver = Driver(uc=True)
driver.maximize_window()
return driver
def exporter(row):
file_name = 'DexCheck.csv'
if not exporter.switch:
pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a')
else:
pd.DataFrame(row, index=[0]).to_csv(file_name, index=False, mode='a', header=False)
exporter.switch = not exporter.switch
exporter.switch = True
def scraper(address, driver):
data_combined = {'Wallet Address': address}
for x in [30, 7, 1]:
driver.get(f'https://dexcheck.ai/app/address-analyzer/{address}?chain=eth&timeframe={x}')
time.sleep(25 if x == 30 else 15)
container_xpath = '//div[@class="crypto-pnl-table"]'
scroll_to_load(driver, container_xpath)
response = Selector(text=driver.page_source)
data_combined.update(ScrapeData(response, x))
exporter(data_combined)
def ScrapeData(response, x):
PNL_total = response.xpath('//div/p[contains(text(),"PNL")]/span/text()').get()
Trading_vol_total_lst = response.xpath('//div/p[contains(text(),"Trading Volume(")]/span/text()').getall()
Trading_vol_total = ''.join(Trading_vol_total_lst)
total_trades = response.xpath('//div/p[contains(text(),"Total Trades(")]/span/text()').get()
Realized_Profit = response.xpath('(((//div[@class="py-0.5"]/div/p)[position() mod 3=2])/text())[position() mod 2=1]').getall()
myprofit = sum(float(profit.replace('$', '').replace(',', '').replace('%', '')) for profit in Realized_Profit)
try:
Averaged_Realized_Profit = myprofit / len(Realized_Profit) if Realized_Profit else 'N/A'
except Exception:
Averaged_Realized_Profit = "N/A"
prefix = {30: '30', 7: '7', 1: '1'}[x]
return {
f'PNL Total {prefix}': PNL_total,
f'Trading Volume Total {prefix}': Trading_vol_total,
f'Total Trades {prefix}': total_trades,
f'Average ROI {prefix}': Averaged_Realized_Profit,
}
if __name__ == "__main__":
driver = get_driver()
df = pd.read_csv('./walletAddress.csv')['address'].tolist()
for address in df:
scraper(address, driver)
driver.close()
关于这个问题的任何指导将不胜感激。提前致谢!
答: 暂无答案
评论
window.screen.height: 1620
options.add_argument("window-size=2880x1620")