提问人:Игорь Кудряшов 提问时间:1/17/2023 最后编辑:Игорь Кудряшов 更新时间:1/18/2023 访问量:155
为什么 selenium 无法解析站点并抛出错误?
Why can't selenium parse the site and throws an error?
问:
我在 bs4 中编写了解析代码,但后来我不得不为 selenium 重新制作它。当您运行代码时,chrome-driver 会打开,但随后关闭并显示错误。在解析开始时,chrome-driver 打开,许多单词“word word word word ...”显示,最后在控制台中仅显示一个链接
# aiogram
from aiogram import types
from aiogram.types.message import ParseMode
from bot import dp
from bot import db
from aiogram.utils.markdown import hbold, hlink
import cfscrape
import fake_headers
# python
import re
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
import time
# default для mvc
city = "belgorod"
city2 = "voronezh"
city3 = "kursk"
radius = 300
allowed_data = ['часов', 'часа','час']
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
stealth(
driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
@dp.message_handler(lambda message: message.text == "Воронеж")
async def avito_list(message: types.Message):
try:
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"https://www.avito.ru/valuyki/avtomobili/{marka}-ASgBAgICAUTgtg3GmSg?cd=1&radius=200"
driver.get(url)
time.sleep(5)
print(url)
main_container = driver.find_elements(By.CSS_SELECTOR,".iva-item-content")
for index, content in enumerate(main_container):
contaier_of_content = content.find_element(By.CSS_SELECTOR, ".iva-item-body")
ad_post = contaier_of_content.find_element(By.CLASS_NAME, "item-line")
#fresh_car = contaier_of_content.find_element(By.CLASS_NAME,{"data-marker":"item-date"}).text.split()[1]
price = content.find_element(By.CSS_SELECTOR, ".price-price").find_element(By.CLASS_NAME, "price").get_attribute("content")
if int(price) <= max_price and int(price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find_element(By.CLASS_NAME,'.iva-item-title').get_attribute("title").split(',')
town_info = contaier_of_content.find_element(By.CLASS_NAME,'.geo-root').find_element(By.TAG_NAME,"span").text
datePost_info = contaier_of_content.find_element(By.CLASS_NAME,'.iva-item-dateInfo').find_element(By.TAG_NAME,"div").text
#link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find_element(By.CSS_SELECTOR,'.price-price').find_element(By.TAG_NAME,'meta').get_attribute("content")
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info+" - "+was_created,"https://avito.ru"+link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
except Exception as ex:
print(ex)
finally:
driver.close()
driver.quit()
这将显示在控制台中
https://www.avito.ru/valuyki/avtomobili/volkswagen-ASgBAgICAUTgtg3GmSg?cd=1&radius=200
https://www.avito.ru/valuyki/avtomobili/vaz_lada-ASgBAgICAUTgtg3GmSg?cd=1&radius=200
答:
0赞
Игорь Кудряшов
1/18/2023
#1
我能够通过结合 bs4 和 selenium 的工作来解决这个问题,多亏了这一点,解析器开始工作
# aiogram
from aiogram import types
from aiogram.types.message import ParseMode
from selenium.webdriver.chrome.service import Service
from bot import dp
from bot import db
from aiogram.utils.markdown import hbold, hlink
import cfscrape
import fake_headers
# python
import re
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
import time
# default для mvc
city = "belgorod"
city2 = "voronezh"
city3 = "kursk"
radius = 300
allowed_data = ['часов', 'часа','час']
options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_argument("--no-sandbox")
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
stealth(
driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
#def get_session(url):
#session = requests.Session()
#session.headers = {
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
# 'Accept-Language':'ru,en-US;q=0.5',
# }
#return cfscrape.create_scraper(sess=session)
@dp.message_handler(lambda message: message.text == "Белгород")
async def avito_list(message: types.Message):
try:
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"your url"
driver.get(url)
source_data = driver.page_source
soup = BeautifulSoup(source_data, 'lxml')
time.sleep(5)
print('Начинаю собирать информацию')
main_container = soup.find_all('div', class_=re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div", class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div", {"data-marker": "item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(
price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a', class_=re.compile('iva-item-title*'))['title'].split(
',')
town_info = contaier_of_content.find('div', class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div', class_=re.compile('iva-item-dateInfo*')).find(
'div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info + " - " + was_created, "https://avito.ru" + link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
except Exception as ex:
print(ex)
finally:
print('Миссия выполнена, сэр!')
@dp.message_handler(lambda message: message.text == "Воронеж")
async def avito_list(message: types.Message):
try:
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"your url"
driver.get(url)
source_data = driver.page_source
soup = BeautifulSoup(source_data, 'lxml')
time.sleep(5)
print('Начинаю собирать информацию')
main_container = soup.find_all('div', class_=re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div", class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div", {"data-marker": "item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(
price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a', class_=re.compile('iva-item-title*'))['title'].split(
',')
town_info = contaier_of_content.find('div', class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div', class_=re.compile('iva-item-dateInfo*')).find(
'div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info + " - " + was_created, "https://avito.ru" + link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
except Exception as ex:
print(ex)
finally:
print('Миссия выполнена, сэр!')
@dp.message_handler(lambda message: message.text == "Курск")
async def avito_list(message: types.Message):
try:
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"your url"
driver.get(url)
source_data = driver.page_source
soup = BeautifulSoup(source_data, 'lxml')
time.sleep(5)
print('Начинаю собирать информацию')
main_container = soup.find_all('div', class_=re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div", class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div", {"data-marker": "item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(
price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a', class_=re.compile('iva-item-title*'))['title'].split(
',')
town_info = contaier_of_content.find('div', class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div', class_=re.compile('iva-item-dateInfo*')).find(
'div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info + " - " + was_created, "https://avito.ru" + link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
except Exception as ex:
print(ex)
finally:
print('Миссия выполнена, сэр!')
评论