How to get the dynamic website content using Selenium?



  • I'm trying to scrape a dynamic website with selenium but not be able to get the website content with selenium.

    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from webdriver_manager.chrome import ChromeDriverManager
    driver = webdriver.Chrome(ChromeDriverManager().install())
    from bs4 import BeautifulSoup
    import time
    
    url = 'https://www.aodour.pk/brand/hemani'
    driver.get(url)
    time.sleep(30) #for loading the page properly
    html_js     = driver.page_source
    
    #driver.find_element_by_tag_name('body').send_keys(Keys.ESCAPE)
    #I tried to skip the AD but didn't know why not it skip the AD
    
    driver.get(url) #try to reload the page again (for skipped the Ad)
    time.sleep(30)
    soup         = BeautifulSoup(html_js,'html.parser')
    

    I have two questions.

    Why not I'm getting the page content?

    Why not ESCAPE keys is working for me?



  • I would recommend not to use time.sleep() with selenium. Instead WebDriverWait / presence_of_element_located() can be used:

    from selenium import webdriver
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.common.exceptions import TimeoutException
    from bs4 import BeautifulSoup
    
    DELAY = 30
    wd = webdriver.Chrome("<PATH_TO_CHROME_DRIVER>")
    
    wd.maximize_window()
    url = 'https://www.aodour.pk/brand/hemani'
    wd.get(url)
    
    try:
        # waiting for pop-up
        landing_popup = WebDriverWait(wd, DELAY).until(EC.presence_of_element_located((By.CLASS_NAME, 'landing_popup')))
        # waiting for loader to vanish
        page_loader = WebDriverWait(wd, DELAY).until_not(EC.presence_of_element_located((By.CLASS_NAME, 'pageloader')))
        # waiting for button to be clickable
        close_button = WebDriverWait(wd, DELAY).until(EC.element_to_be_clickable((By.XPATH, '//button[./span[contains(@class, "icon-close")]]')))
        wd.execute_script("arguments[0].click();", close_button)
        print("Ad skipped")
    except TimeoutException:
        print("Ad pop-up didn't appeared")
    except Exception as e:
        print("Error: " + str(e))
    
    
    html = wd.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    print(soup)
    


Suggested Topics

  • 2
  • 2
  • 2
  • 2
  • 2