Issue
I am trying to scrape on ethplorer.io. I want to scrape many pages. My code is like this. But it scrapes page(11) three times. range(11,14) I couldn't understand why?
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
base_url = "https://ethplorer.io/tr/address/0xf87e31492faf9a91b02ee0deaad50d51d56d5d4d#pageSize=100&tab=tab-holders&holders="
results=[]
for page_number in range(11,14):
url = base_url+str(page_number)
driver.get(url)
data = driver.find_elements(By.CLASS_NAME, "local-link")
for x in data:
results.append(x.text)
driver.quit
with open("all_data.txt" , "w") as file:
for x in results:
file.write(x + "\n")
Solution
I have applied some modifications to your code to have it working through the several pages you are calling and to capture the text within the hyperlinks I assume you are targeting, please check below:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.common.exceptions import StaleElementReferenceException
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://ethplorer.io/tr/address/0xf87e31492faf9a91b02ee0deaad50d51d56d5d4d#pageSize=100&tab=tab-holders&holders=11" # Here we are calling the first page we need, which is page #11 in this case.
xpath = "//div[@id='token-holders-tab']//div[@id='address-token-holders']//div[@class='block']//table//tr//td//a[contains(@class,'local-link')]"
driver.get(url)
data =driver.find_elements_by_xpath("//div[@id='token-holders-tab']//div[@id='address-token-holders']//div[@class='block']//table//tr//td//a[contains(@class,'local-link')]")
results=[]
for page_number in range(12,15): # Range should start from the next page (page # 12 in this case). Range end with last page you need + 2, in this case you need to scrape from 11 untill 13 ,so rage end should be 15.
for x in data: # On first round it will get page # 11 data.
results.append(x.text)
nextPage = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH,"//div[@id='token-holders-tab']//tr[contains(@class,'paginationFooter')]//A[@class='page-link'][text()='"+str(page_number)+"']")))
driver.execute_script("arguments[0].click();", nextPage)
time.sleep(3)
data =driver.find_elements_by_xpath("//div[@id='token-holders-tab']//div[@id='address-token-holders']//div[@class='block']//table//tr//td//a[contains(@class,'local-link')]")
driver.quit
with open("all_data.txt" , "w") as file:
for x in results:
file.write(x + "\n")
Answered By - Rola
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.