提问人:Yamar Lyons 提问时间:11/16/2023 更新时间:11/16/2023 访问量:16
使用 Python 和 selenium 对日期选择器进行 Web 抓取
Web-Scraping a date selector with Python and selenium
问:
我正在为我的大学期末考试做一个 Expedia 网络抓取项目。我一直在努力使用我的代码,在日历上选择实际日期,它打开日历并循环播放。我知道这与我无法与咏叹调标签交互有关,而且我很确定这与日历在网格系统上而不是按钮有关?但我不知道该怎么办。 这是我的代码块
trip_date_xpath = '//td[contains(@class="uitk-day" and @aria-label, "{}")]'.format(trip_date)
departing_date_element = ""
while departing_date_element == "":
try:
departing_date_element = WebDriverWait(driver,3).until(
EC.presence_of_element_located((By.XPATH, trip_date_xpath))
)
departing_date_element.click() #Click on the departure date
time.sleep(1)
except TimeoutException:
departing_date_element=""
next_month_xpath = "//button[@data-stid='uitk-calendar-navigation-controls-next-button']"
driver.find_element("xpath",next_month_xpath).click()
time.sleep(1)
depart_date_done_xpath = "//button[@class='uitk-button uitk-button-medium uitk-button-has-text uitk-button-primary uitk-layout-flex-item']"
driver.find_element("xpath",depart_date_done_xpath).click()
#********************** Complete Departure Date Portion **********************
`
我还将留下一个指向 expedia 的链接,以便您可以自己测试。https://www.expedia.com/这是完整的代码......
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import pandas as pd
import smtplib
from email.message import EmailMessage
import schedule
departure_flight_inputs = {'Departure': "ORD",
'Arrival': "LAS",
'Date': "Saturday, June 1, 2024"}
return_flight_inputs = {'Departure': "LAS",
'Arrival': "ORD",
'Date': "Saturday, June 8, 2024"}
def find_cheapest_flights(flight_info):
PATH = R"C:\Users\theya\OneDrive\Desktop\FlightPriceTracker\chromedriver.exe"
chrome_options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=webdriver.ChromeService(executable_path=PATH),
options=chrome_options)
leaving_from = flight_info['Departure']
going_to = flight_info['Arrival']
trip_date = flight_info['Date']
driver.maximize_window()
#Go to Expedia
driver.get("https://expedia.com")
#Click on Flights
flight_xpath = "//span[normalize-space()='Flights']"
flight_element = WebDriverWait(driver,5).until(
EC.presence_of_element_located((By.XPATH, flight_xpath))
)
flight_element.click()
time.sleep(0.2)
#Click on One-Way. I prefer one way flights
oneway_xpath = "//span[normalize-space()='One-way']"
one_way_element = WebDriverWait(driver,5).until(
EC.presence_of_element_located((By.XPATH, oneway_xpath))
)
one_way_element.click()
time.sleep(0.2)
#Part 1: Flying From, Flying To, Departure Date, Return Date
#********************** Complete Leaving From Portion **********************
leaving_from_xpath = "//button[@aria-label='Leaving from']"
leaving_from_element = WebDriverWait(driver,5).until(
EC.presence_of_element_located((By.XPATH, leaving_from_xpath))
)
leaving_from_element.click()
#Added the input so I would be able to write orignal code only allowed it to click.
leaving_from_input_xpath = "//input[@id='origin_select']"
leaving_from_input = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, leaving_from_input_xpath))
)
leaving_from_input.clear
leaving_from_input.send_keys(leaving_from)
time.sleep(1)
time.sleep(1) #Need this otherwise it will be too fast for the broswer
leaving_from_input.send_keys(Keys.RETURN)
#********************** Complete Leaving From Portion **********************
#********************** Complete Going To Portion **********************
going_to_xpath = "//button[@aria-label='Going to']"
going_to_element = WebDriverWait(driver,5).until(
EC.presence_of_element_located((By.XPATH, going_to_xpath))
)
going_to_element.click()
going_to_input_xpath = "//input[@id='destination_select']"
going_to_input = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, going_to_input_xpath))
)
going_to_input.clear
going_to_input.send_keys(going_to)
time.sleep(1)
time.sleep(1) #Need this otherwise it will be too fast for the broswer
going_to_input.send_keys( Keys.RETURN) #Presses the return key
#********************** Complete Going To Portion **********************
#********************** Complete Departure Date Portion **********************
departing_box_xpath = "//button[contains (@aria-label,'Date')]"
depart_box_element = WebDriverWait(driver,5).until(
EC.presence_of_element_located((By.XPATH, departing_box_xpath))
)
depart_box_element.click() #Click on the departure box
time.sleep(2)
#Find the current date. WILL arrow through too
trip_date_xpath = '//td[contains(@class="uitk-day" and @aria-label, "{}")]'.format(trip_date)
departing_date_element = ""
while departing_date_element == "":
try:
departing_date_element = WebDriverWait(driver,3).until(
EC.presence_of_element_located((By.XPATH, trip_date_xpath))
)
departing_date_element.click() #Click on the departure date
time.sleep(1)
except TimeoutException:
departing_date_element=""
next_month_xpath = "//button[@data-stid='uitk-calendar-navigation-controls-next-button']"
driver.find_element("xpath",next_month_xpath).click()
time.sleep(1)
depart_date_done_xpath = "//button[@class='uitk-button uitk-button-medium uitk-button-has-text uitk-button-primary uitk-layout-flex-item']"
driver.find_element("xpath",depart_date_done_xpath).click()
#********************** Complete Departure Date Portion **********************
#********************** Click Search **********************
search_button_xpath = '//button[@data-testid="submit-button"]'
driver.find_element_by_xpath(search_button_xpath).click()
time.sleep(15) #Need to let the page load properly
#********************** Click Search **********************
#Part 2: Setting Conditions for our flight
#********************** Check for Nonstop Flights Sorted by Lowest Price **********************
nonstop_flight_xpath = '//input[@id="stops-0"]'
one_stop_flight_xpath = '//input[@id="stops-1"]'
if len(driver.find_elements_by_xpath(nonstop_flight_xpath)) > 0:
driver.find_element_by_xpath(nonstop_flight_xpath).click()
time.sleep(5)
#Check if there are available flights
available_flights = driver.find_elements_by_xpath("//span[contains(text(),'Select and show fare information ')]")
if len(available_flights) > 0:
if len(available_flights) == 1: #Don't have to sort by prices here
flights = [(item.text.split(",")[0].split('for')[-1].title(),
item.text.split(",")[1].title().replace("At",":"),
item.text.split(",")[2].title().replace("At",":"),
item.text.split(",")[3].title().replace("At",":")) for item in available_flights[0:5]]
else:
#Sort by lowest prices
driver.find_element_by_xpath('//option[@data-opt-id="PRICE_INCREASING"]').click()
time.sleep(5)
flights = [(item.text.split(",")[0].split('for')[-1].title(),
item.text.split(",")[1].title().replace("At",":"),
item.text.split(",")[2].title().replace("At",":"),
item.text.split(",")[3].title().replace("At",":")) for item in available_flights[0:5]]
print("Conditions satisfied for: {}:{}, {}:{}, {}:{}".format("Departure",leaving_from,
"Arrival",going_to,
"Date",trip_date))
driver.quit()
return flights
else:
print('Not all conditions could be met for the following: "{}:{}, {}:{}, {}:{}'.format("Departure",leaving_from,
"Arrival",going_to,
"Date",trip_date))
driver.quit()
return []
#********************** Check for Nonstop Flights Sorted by Lowest Price **********************
def send_email():
#Get return values
departing_flights = find_cheapest_flights(departure_flight_inputs)
return_flights = find_cheapest_flights(return_flight_inputs)
#Put it into a dataframe to visualize this more easily
df = pd.DataFrame(departing_flights + return_flights)
if not df.empty: #Only send an email if we have actual flight info
email = open('Your Email Here').read()
password=open('Your Password Here').read()
msg = EmailMessage()
msg['Subject'] = "Python Flight Info! {} --> {}, Departing: {}, Returning: {}".format(departure_flight_inputs['Departure'], departure_flight_inputs['Arrival'], departure_flight_inputs['Date'],return_flight_inputs['Date'])
msg['From'] = email
msg['To'] = email
msg.add_alternative('''\
<!DOCTYPE html>
<html>
<body>
{}
</body>
</html>'''.format(df.to_html()), subtype="html")
with smtplib.SMTP_SSL('Email server name here',465) as smtp:
smtp.login(email,password)
smtp.send_message(msg)
schedule.clear()
schedule.every(.1).minutes.do(send_email)
while True:
schedule.run_pending()
time.sleep(1)
答: 暂无答案
下一个:自动导入 FBX 和重定向
评论