使用 Python 和 selenium 对日期选择器进行 Web 抓取

Web-Scraping a date selector with Python and selenium

提问人:Yamar Lyons 提问时间:11/16/2023 更新时间:11/16/2023 访问量:16

问:

我正在为我的大学期末考试做一个 Expedia 网络抓取项目。我一直在努力使用我的代码,在日历上选择实际日期,它打开日历并循环播放。我知道这与我无法与咏叹调标签交互有关,而且我很确定这与日历在网格系统上而不是按钮有关?但我不知道该怎么办。 这是我的代码块

trip_date_xpath = '//td[contains(@class="uitk-day" and @aria-label, "{}")]'.format(trip_date)
departing_date_element = ""
while departing_date_element == "":
    try:
        departing_date_element = WebDriverWait(driver,3).until(
        EC.presence_of_element_located((By.XPATH, trip_date_xpath))
        )
        departing_date_element.click() #Click on the departure date
        time.sleep(1)
    except TimeoutException:
       departing_date_element=""
       next_month_xpath = "//button[@data-stid='uitk-calendar-navigation-controls-next-button']"
       driver.find_element("xpath",next_month_xpath).click()
       time.sleep(1)

depart_date_done_xpath = "//button[@class='uitk-button uitk-button-medium uitk-button-has-text uitk-button-primary uitk-layout-flex-item']"
driver.find_element("xpath",depart_date_done_xpath).click()
#**********************  Complete Departure Date Portion  **********************
`

我还将留下一个指向 expedia 的链接,以便您可以自己测试。https://www.expedia.com/这是完整的代码......

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import pandas as pd

import smtplib
from email.message import EmailMessage

import schedule

departure_flight_inputs = {'Departure': "ORD",
                       'Arrival': "LAS",
                       'Date': "Saturday, June 1, 2024"}

return_flight_inputs = {'Departure': "LAS",
                    'Arrival': "ORD",
                    'Date': "Saturday, June 8, 2024"}

def find_cheapest_flights(flight_info):
PATH = R"C:\Users\theya\OneDrive\Desktop\FlightPriceTracker\chromedriver.exe"
chrome_options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=webdriver.ChromeService(executable_path=PATH), 
options=chrome_options)

leaving_from = flight_info['Departure']
going_to = flight_info['Arrival']
trip_date = flight_info['Date']

driver.maximize_window()
#Go to Expedia
driver.get("https://expedia.com")


#Click on Flights

flight_xpath = "//span[normalize-space()='Flights']"
flight_element = WebDriverWait(driver,5).until(
    EC.presence_of_element_located((By.XPATH, flight_xpath))
    )
flight_element.click()
time.sleep(0.2)


#Click on One-Way. I prefer one way flights
oneway_xpath = "//span[normalize-space()='One-way']"
one_way_element = WebDriverWait(driver,5).until(
    EC.presence_of_element_located((By.XPATH, oneway_xpath))
    )
one_way_element.click()
time.sleep(0.2)


#Part 1: Flying From, Flying To, Departure Date, Return Date

#**********************  Complete Leaving From Portion  **********************
leaving_from_xpath = "//button[@aria-label='Leaving from']"
leaving_from_element = WebDriverWait(driver,5).until(
    EC.presence_of_element_located((By.XPATH, leaving_from_xpath))
    )
leaving_from_element.click()

#Added the input so I would be able to write orignal code only allowed it to click.
leaving_from_input_xpath = "//input[@id='origin_select']"
leaving_from_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, leaving_from_input_xpath))
    )

leaving_from_input.clear
leaving_from_input.send_keys(leaving_from)
time.sleep(1)

time.sleep(1) #Need this otherwise it will be too fast for the broswer
leaving_from_input.send_keys(Keys.RETURN)
#**********************  Complete Leaving From Portion  **********************



#**********************  Complete Going To Portion  **********************
going_to_xpath = "//button[@aria-label='Going to']"
going_to_element = WebDriverWait(driver,5).until(
    EC.presence_of_element_located((By.XPATH, going_to_xpath))
    )

going_to_element.click()

going_to_input_xpath = "//input[@id='destination_select']"
going_to_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, going_to_input_xpath))
    )

going_to_input.clear
going_to_input.send_keys(going_to)
time.sleep(1)

time.sleep(1) #Need this otherwise it will be too fast for the broswer
going_to_input.send_keys( Keys.RETURN) #Presses the return key 
#**********************  Complete Going To Portion  **********************



#**********************  Complete Departure Date Portion  **********************
departing_box_xpath = "//button[contains (@aria-label,'Date')]"

depart_box_element = WebDriverWait(driver,5).until(
    EC.presence_of_element_located((By.XPATH, departing_box_xpath))
    )

depart_box_element.click() #Click on the departure box
time.sleep(2)


#Find the current date. WILL arrow through too


trip_date_xpath = '//td[contains(@class="uitk-day" and @aria-label, "{}")]'.format(trip_date)
departing_date_element = ""
while departing_date_element == "":
    try:
        departing_date_element = WebDriverWait(driver,3).until(
        EC.presence_of_element_located((By.XPATH, trip_date_xpath))
        )
        departing_date_element.click() #Click on the departure date
        time.sleep(1)
    except TimeoutException:
       departing_date_element=""
       next_month_xpath = "//button[@data-stid='uitk-calendar-navigation-controls-next-button']"
       driver.find_element("xpath",next_month_xpath).click()
       time.sleep(1)

depart_date_done_xpath = "//button[@class='uitk-button uitk-button-medium uitk-button-has-text uitk-button-primary uitk-layout-flex-item']"
driver.find_element("xpath",depart_date_done_xpath).click()
#**********************  Complete Departure Date Portion  **********************


#**********************  Click Search  **********************
search_button_xpath = '//button[@data-testid="submit-button"]'
driver.find_element_by_xpath(search_button_xpath).click()
time.sleep(15) #Need to let the page load properly
#**********************  Click Search  **********************






#Part 2: Setting Conditions for our flight

#**********************  Check for Nonstop Flights Sorted by Lowest Price  **********************
nonstop_flight_xpath = '//input[@id="stops-0"]'
one_stop_flight_xpath = '//input[@id="stops-1"]'


if len(driver.find_elements_by_xpath(nonstop_flight_xpath)) > 0:
    
    driver.find_element_by_xpath(nonstop_flight_xpath).click()
    time.sleep(5)
    
    #Check if there are available flights
    available_flights = driver.find_elements_by_xpath("//span[contains(text(),'Select and show fare information ')]")
    if len(available_flights) >  0:
        if len(available_flights) == 1: #Don't have to sort by prices here
            flights = [(item.text.split(",")[0].split('for')[-1].title(),
                        item.text.split(",")[1].title().replace("At",":"),
                        item.text.split(",")[2].title().replace("At",":"),
                        item.text.split(",")[3].title().replace("At",":")) for item in available_flights[0:5]]

        else:
            #Sort by lowest prices
            driver.find_element_by_xpath('//option[@data-opt-id="PRICE_INCREASING"]').click()
            time.sleep(5)
            flights = [(item.text.split(",")[0].split('for')[-1].title(),
                        item.text.split(",")[1].title().replace("At",":"),
                        item.text.split(",")[2].title().replace("At",":"),
                        item.text.split(",")[3].title().replace("At",":")) for item in available_flights[0:5]]
        
        
        print("Conditions satisfied for: {}:{}, {}:{}, {}:{}".format("Departure",leaving_from,
                                                         "Arrival",going_to,
                                                         "Date",trip_date))
        driver.quit()
        return flights
else:
    print('Not all conditions could be met for the following: "{}:{}, {}:{}, {}:{}'.format("Departure",leaving_from,
                                                                                         "Arrival",going_to,
                                                                                         "Date",trip_date))
    driver.quit()
    return []



#**********************  Check for Nonstop Flights Sorted by Lowest Price  **********************



def send_email():
#Get return values
departing_flights = find_cheapest_flights(departure_flight_inputs)
return_flights = find_cheapest_flights(return_flight_inputs)

#Put it into a dataframe to visualize this more easily
df = pd.DataFrame(departing_flights + return_flights)

if not df.empty: #Only send an email if we have actual flight info
    email = open('Your Email Here').read()
    password=open('Your Password Here').read()
    
    msg = EmailMessage()
    
    msg['Subject'] = "Python Flight Info! {} --> {}, Departing: {}, Returning: {}".format(departure_flight_inputs['Departure'], departure_flight_inputs['Arrival'], departure_flight_inputs['Date'],return_flight_inputs['Date'])
    
    msg['From'] = email
    msg['To'] = email
    
    msg.add_alternative('''\
        <!DOCTYPE html>
        <html>
            <body>
                {}
            </body>
        </html>'''.format(df.to_html()), subtype="html")

        
    with smtplib.SMTP_SSL('Email server name here',465) as smtp:
        smtp.login(email,password)
        smtp.send_message(msg)


schedule.clear()
schedule.every(.1).minutes.do(send_email)

while True:
schedule.run_pending()
time.sleep(1)
python selenium-webdriver web-scraping 自动化

评论


答: 暂无答案