Issue
There are 3821 links but they gave me only 103 links I also apply the condition of window.scroll
to the links get all but they do not work
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import ssl
import undetected_chromedriver as uc
import requests
from bs4 import BeautifulSoup
import re
ssl._create_default_https_context = ssl._create_unverified_context
# ... Rest of your code ...
options = uc.ChromeOptions()
driver = uc.Chrome(options=options)
driver.get("http://www.servicealberta.gov.ab.ca/find-if-business-is-licenced.cfm")
# Click the button to load initial content
click_on_button = driver.find_element(By.CSS_SELECTOR, "td:nth-child(1) input:nth-child(1)")
click_on_button.click()
time.sleep(2)
base_url = "http://www.servicealberta.gov.ab.ca/"
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
data = BeautifulSoup(driver.page_source, "html.parser")
links = data.select("tbody td[colspan='4'] a")
for link in links:
url = base_url + link['href']
print(url)
print(len(url))
Solution
To get all links from the page in pandas
DataFrame you can use this example:
import pandas as pd
import requests
from bs4 import BeautifulSoup
api_url = "http://www.servicealberta.gov.ab.ca/find-if-business-is-licenced.cfm"
payload = {"faction": "SearchResults", "BusName": "", "BusType": "all", "BusMunc": ""}
soup = BeautifulSoup(requests.post(api_url, data=payload).content, "html.parser")
all_data = []
for row in soup.select('td[colspan="4"]:has(a)'):
link = row.a["href"]
name = row.a.get_text(strip=True, separator=" ")
type_ = row.find_next("td").get_text(strip=True)
area = row.find_next("td").find_next("td").get_text(strip=True)
all_data.append({"name": name, "type": type_, "area": area, "link": link})
df = pd.DataFrame(all_data)
print(df.tail())
Prints:
name type area link
3816 ZOUPPAS BARRY SCOTT d.b.a ARDCO CONSTRUCTION Prepaid Contractor AIRDRIE /find-if-business-is-licenced.cfm?faction=SearchDetails&BusID=25581
3817 ZSA LEGAL RECRUITMENT LIMITED d.b.a ZSA LEGAL RECRUITMENT LIMITED Employment Agency CALGARY /find-if-business-is-licenced.cfm?faction=SearchDetails&BusID=9367
3818 ZU HOUSE LTD. d.b.a ZU HOUSE LTD. Prepaid Contractor EDMONTON /find-if-business-is-licenced.cfm?faction=SearchDetails&BusID=43181
3819 ZYIA ACTIVE CANADA LIMITED d.b.a ZYIA ACTIVE CANADA LIMITED Direct Seller EDMONTON /find-if-business-is-licenced.cfm?faction=SearchDetails&BusID=44542
3820 ZZ CONSTRUCTION LTD. d.b.a ZZ CONSTRUCTION LTD. Prepaid Contractor CALGARY /find-if-business-is-licenced.cfm?faction=SearchDetails&BusID=33207
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.