Issue
I am trying to scraping name and email of agents from this site. The code firstly captures link to every profile on first page and then visits each profile to get name and email. But the problem is that it is taking alot of time to get anchor tag having name of agent in it. Here's the code:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
class MessageIndividual(webdriver.Chrome):
def __init__(self, driver_path=r";C:/SeleniumDriver", teardown=False):
self.driver_path = driver_path
self.teardown = teardown
os.environ['PATH'] += self.driver_path
#options = webdriver.ChromeOptions()
#options.headless = True
super(MessageIndividual, self).__init__()
self.implicitly_wait(5)
self.maximize_window()
def __exit__(self, exc_type, exc_val, exc_tb):
if self.teardown:
self.quit()
def goToSite(self):
url = 'https://www.bhhs.com/agent-search-results'
self.get(url)
def getDetails(self):
mylist = [my_elem.get_attribute("href") for my_elem in WebDriverWait(self, 1000).until(
EC.visibility_of_all_elements_located((By.XPATH, "//section[@class='cmp-agent-results-list-view']/div[@class='cmp-agent-results-list-view__content container ']/div[@class='row associate pt-3 pb-3 ']/div[@class='col-6 col-sm-4 col-lg-3 order-lg-3 associate__btn-group']/section[2]/a[@href]")))]
for i in mylist:
self.execute_script("window.open('');")
self.switch_to.window(self.window_handles[1])
self.get(i)
name = WebDriverWait(self,5).until(
EC.presence_of_element_located((By.XPATH,'//h1[@class="cmp-agent__name"]/a[1]'))
)
print(name.text)
email = WebDriverWait(self,1).until(EC.presence_of_element_located((By.CLASS_NAME,'cmp-agent-details__mail')))
print(email.text)
self.close()
self.switch_to.window(self.window_handles[0])
if __name__ == '__main__':
inst = MessageIndividual(teardown=False)
inst.goToSite()
inst.getDetails()
Is there any way I can scrape name in lesser time?
Solution
I have change the xpath
to identify the anchor tag and remove the new window open in each iteration. hope this will reduce some time.
def getDetails(self):
mylist = [my_elem.get_attribute("href") for my_elem in WebDriverWait(self, 1000).until(
EC.visibility_of_all_elements_located((By.XPATH, "//a[.//span[normalize-space(.)='agent details']]")))]
for i in mylist:
#self.execute_script("window.open('');")
#self.switch_to.window(self.window_handles[1])
self.get(i)
name = WebDriverWait(self,5).until(
EC.presence_of_element_located((By.XPATH,'//h1[@class="cmp-agent__name"]/a[1]'))
)
print(name.text)
email = WebDriverWait(self,1).until(EC.presence_of_element_located((By.CLASS_NAME,'cmp-agent-details__mail')))
print(email.text)
#self.close()
#self.switch_to.window(self.window_handles[0])
Answered By - KunduK
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.