Issue
Variables
chrome_path = 'chromedriver'
driver = webdriver.Chrome(chrome_path)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-popup-blocking")
driver.get("https://gibiru.com/")
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').click()
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').send_keys("lfc")
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').send_keys(Keys.RETURN)
driver.find_element(By.XPATH, "/html/body/div[1]/main/div[1]/div/div/div/div[2]").click()
time.sleep(2)
I have this try-stratement, which works perfect, but needs to be looped arcordding to the value of page_length, which is equal to 10 in this situation.
try:
#1st page
page_length = len(driver.find_elements(By.CSS_SELECTOR, "div.gsc-resultsRoot.gsc-tabData.gsc-tabdActive div.gsc-cursor-box.gs-bidi-start-align div.gsc-cursor div.gsc-cursor-page"))
index_count = 0
current_page = int(page_length) - int(index_count)
print("Number of availables pages : "+str(current_page)) #Print = 10
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) + "\n")
print(my_href.get_attribute("src"))
index_count += 1
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div['+str(index_count)+']').click()
time.sleep(2)
#2nd page
current_page = int(page_length) - int(index_count)
print("Number of availables pages : "+str(current_page)) #Print = 10
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) + "\n")
print(my_href.get_attribute("src"))
index_count += 1
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div['+str(index_count)+']').click()
time.sleep(2)
except Exception as e:
print(e)
driver.quit()
But I seek help in regards to creating a for-loop. That can do what the try-statement can, but in fewer lines of code. This is what I'm thinking of :
for x in page_array_number:
index_count = 0
current_page = int(page_length) - int(index_count)
print("Number of availables pages : "+str(current_page))
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) + "\n")
print(my_href.get_attribute("src"))
print("Counter is before : "+str(index_count))
index_count += 1
print("Counter is after : "+str(index_count))
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
time.sleep(2)
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div['+str(index_count)+']').click()
time.sleep(2)
if index_count == page_length:
print("Done scraping urls from "+str(page_length)+" pages")
break
The output I be getting is as such: It's seems like it is the counter that is the problem, it doesnt add 1 on for every loop.
Solution
I got it working with this for loop :
for index, item in enumerate(page_array_number):
print(index)
current_page = int(page_length) - int(index)
print("Number of availables pages : "+str(current_page))
index = index+1
print("Counter is after : "+str(index))
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) + "\n")
print(my_href.get_attribute("src"))
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
time.sleep(2)
if index == 1:
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div['+str(index+1)+']').click()
time.sleep(2)
elif index > 1:
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div['+str(index)+']').click()
time.sleep(2)
elif index == page_length:
print("Done scraping urls from "+str(page_length)+" pages")
break
Answered By - AnxiousLuna
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.