How to get the dynamic website content using Selenium?
-
I'm trying to scrape a dynamic website with selenium but not be able to get the website content with selenium.
from selenium import webdriver from selenium.webdriver.common.keys import Keys from webdriver_manager.chrome import ChromeDriverManager driver = webdriver.Chrome(ChromeDriverManager().install()) from bs4 import BeautifulSoup import time url = 'https://www.aodour.pk/brand/hemani' driver.get(url) time.sleep(30) #for loading the page properly html_js = driver.page_source #driver.find_element_by_tag_name('body').send_keys(Keys.ESCAPE) #I tried to skip the AD but didn't know why not it skip the AD driver.get(url) #try to reload the page again (for skipped the Ad) time.sleep(30) soup = BeautifulSoup(html_js,'html.parser')
I have two questions.
Why not I'm getting the page content?
Why not ESCAPE keys is working for me?
-
I would recommend not to use time.sleep() with selenium. Instead WebDriverWait / presence_of_element_located() can be used:
from selenium import webdriver from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.common.exceptions import TimeoutException from bs4 import BeautifulSoup DELAY = 30 wd = webdriver.Chrome("<PATH_TO_CHROME_DRIVER>") wd.maximize_window() url = 'https://www.aodour.pk/brand/hemani' wd.get(url) try: # waiting for pop-up landing_popup = WebDriverWait(wd, DELAY).until(EC.presence_of_element_located((By.CLASS_NAME, 'landing_popup'))) # waiting for loader to vanish page_loader = WebDriverWait(wd, DELAY).until_not(EC.presence_of_element_located((By.CLASS_NAME, 'pageloader'))) # waiting for button to be clickable close_button = WebDriverWait(wd, DELAY).until(EC.element_to_be_clickable((By.XPATH, '//button[./span[contains(@class, "icon-close")]]'))) wd.execute_script("arguments[0].click();", close_button) print("Ad skipped") except TimeoutException: print("Ad pop-up didn't appeared") except Exception as e: print("Error: " + str(e)) html = wd.page_source soup = BeautifulSoup(html, 'html.parser') print(soup)