Issue
The question is about selenium webdriver's explicitly wait feature which is same locating action with a timeout. Webdriver waits element to be visible or present. In my situation, I locate elements with explicit waiting and without it. When it is without the explicit wait, it founds and assigns it. But below, I also try to locate element explicitly waiting, that variable is unassigned, and in such form passes to for iteration, and my for iteration gives UnboundLocalError
I know that error is not selenium but python based. I can get rid of that error by using instead of pass
y_priceNewProds = x_priceNewProds` but I need to understand why selenium webdriver cannot locate the element with wait if it already a line above found it. There must be cookies also.
code is below;
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#reading from csv file url-s
def readCSV(path_csv):
df=pd.read_csv(path_csv)
return df
fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()
def create_driver():
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service(r'C:\pythonPro\w_crawl\AmznScrpBot\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
return driver
#going to urls 1-by-1
def goToUrl_Se(driver):
global counter
counter = 0
for i in range(0, length_of_column_urls + 1):
xUrl = fileCSV.iloc[i, 1]
print(xUrl,i)
# going to url(amazn) via Selenium WebDriver
driver.get(xUrl)
parse_data()
counter+=1
driver.quit()
#fetch-parse the data from url page
def parse_data():
global asin, title, bookform, priceNewProd
wait=WebDriverWait(driver,timeout=20)
try:
#trying code snippets where locating elements occur by XPATH, in some ofthem
#there is also WebDriver explicit wait, that element has both methods
#with presence_of element wait method and without explicit wait method. Just trying to understand
#why sometimes elements cannot be located on page
y_data_index=wait.until(EC.presence_of_element_located((By.XPATH,"//div[@data-index='1']")))
print(y_data_index.text,'y_index')
x_data_index=wait.until(EC.visibility_of_element_located((By.XPATH,"//div[@data-index='1']")))
print(x_data_index.text,'x_index')
x_titles=driver.find_elements(By.XPATH,'//span[contains(@class,"a-size-medium")]')
x_bookforms = driver.find_elements(By.XPATH,'//div[contains(@class,"a-section a-spacing-none a-spacing-top-micro s-price-instructions-style")]//div[contains(@class,"a-row a-size-base a-color-base")]//a[contains(@class,"a-size-base a-link-normal puis-medium-weight-text s-underline-text s-underline-link-text s-link-style s-link-centralized-style a-text-bold")]')
x_priceNewProds = driver.find_elements(By.XPATH,'//div[contains(@class,"a-row a-size-base a-color-base")]//span[contains(@data-a-color,"base")]//span[contains(@class,"a-offscreen")][1]')
y_priceNewProds=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(@class,"a-row a-size-base a-color-base")]//span[contains(@data-a-color,"base")]//span[contains(@class,"a-offscreen")][1]')))
except:
pass
for i in range(len(x_titles)):
x_title=x_titles[i].text
x_priceNewProd = x_priceNewProds[i].text
print(x_priceNewProd)
y_priceNewProd=y_priceNewProds[i].text
print(y_priceNewProd)
try:
x_bookform=x_bookforms[i].text
print(x_bookform)
except:
x_bookform='-'
title = x_title
bookform = x_bookform
priceNewProd = x_priceNewProd
write_to_csv()
def write_to_csv():
if counter==0:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='w')
else:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='a',header=False)
driver=create_driver()
goToUrl_Se(driver)
y_priceNewProd=y_priceNewProds[i].text UnboundLocalError: local variable 'y_priceNewProds' referenced before assignment
Solution
Since you didn't share all the code including the link we can't know what exactly happens there, but I guess that this line
wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(@class,"a-row a-size-base a-color-base")]//span[contains(@data-a-color,"base")]//span[contains(@class,"a-offscreen")][1]')))
Throws exception since that element(s) are / is not visible.
This is why y_priceNewProds
remains not assigned any value.
While if you use simple driver.find_elements
method it can find those elements since they are existing but, again, not visible.
UPD
After you shared the link I tested that and now I can say: Yes, I was right!
Those elements are not visible.
The following code throws TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(@class,"a-row a-size-base a-color-base")]//span[contains(@data-a-color,"base")]//span[contains(@class,"a-offscreen")][1]')))
print(m_list)
UPD2
But if you will take the direct parent of that element it will work correctly since it is visible.
For the following code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(@class,"a-row a-size-base a-color-base")]//span[contains(@data-a-color,"base")]')))
print(len(m_list))
the output is:
27
Answered By - Prophet
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.