Issue
In the line when defining adress I want to add 4 results from different tags into 1. But I cannot figure out how to do so.
`from bs4 import BeautifulSoup
import requests, openpyxl
#excel = openpyxl.Workbook()
#print(excel.sheetnames)
#sheet = excel.active
#sheet.title = 'KNX manufacturers'
#print(excel.sheetnames)
#sheet.append(['name', 'description', 'country', 'website', 'phone', 'CEO', 'adress'])
try:
source = requests.get('https://www.knx.org/knx-en/for-professionals/community/manufacturers/')
source.raise_for_status()
soup = BeautifulSoup(source.text,'html.parser')
companys = soup.find('div', class_="accordion").find_all('li')
for company in companys:
name = company.find('span', class_="desktop_only").text
description = company.find('div', class_="col-md-6 col-sm-12 col-xs-12").text.strip()
country = company.find('div', class_="col-lg-4 col-sm-6 col-xs-6 item").span.text
website = company.find('div', class_="col-sm-6 col-xs-12").a.text
phone = company.find('div', class_="col-sm-6 col-xs-12").find_all('span')[5].text.strip('Phone: ')
CEO = company.find('div', class_="col-sm-6 col-xs-12").find_all('strong')[0].text.strip()
adress = company.find('div', class_="col-sm-6 col-xs-12").find_all('span')[1, 2, 3, 4].text
print(adress)
#print(name, description, country, website, phone, CEO, adress)
#sheet.append([name, description, country, website, phone, CEO, adress])
break
except Exception as e:
print(e)
#excel.save('KNX manufacturers | Bemsiq.xlsx')`
I tried adding find_all('span')[1, 2, 3, 4] but it can only show one of these results at the time.
Solution
To get all data from the table to pandas DataFrame you can try:
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = 'https://www.knx.org/knx-en/for-professionals/community/manufacturers/'
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
def get_text(tag):
return re.sub(r'\s{2,}', ' ', tag.text.strip())
all_data = []
for a in soup.select('.accordion-body'):
name = a.select_one('.mobile_only').text
phone = a.select_one('strong:-soup-contains("Phone:")')
phone = phone.find_next_sibling(string=True) if phone else '-'
web = a.select_one('strong:-soup-contains("Website:") + a')
web = web['href'] if web else '-'
email = a.select_one('strong:-soup-contains("Email:") + a')
email = email['href'] if email else '-'
person = a.strong
line1 = person.find_next_sibling("span")
line2 = line1.find_next_sibling("span")
line3 = line2.find_next_sibling("span")
address = f'{get_text(line1)}, {get_text(line2)}, {get_text(line3)}'
person = person.text
all_data.append((name, phone, web, email, person, address))
df = pd.DataFrame(all_data, columns=['Name', 'Phone', 'Web', 'Email', 'Person', 'Address'])
print(df.head(10).to_markdown(index=False))
Prints:
Name | Phone | Web | Person | Address | |
---|---|---|---|---|---|
[mn]medianet | +49.6103697784 | https://www.medianet-home.de/ | [email protected] | Bernhard Hnida | Am Taubhaus 29, 63303 Dreieich, Germany |
1Home Solutions GmbH | +386.51300606 | http://www.1home.io | [email protected] | Dejan Bukovnik | Wattstrasse 11, 13355 Berlin, Germany |
3domotic Global Systems, S.L. | +34.610991993 | - | [email protected] | Santiago Ribas Roca | Enric Granados 153, Pral Bis - 1, 08008 Barcelona, Spain |
4ba GmbH | +49.6461980440 | http://www.4ba-gmbh.de | [email protected] | Oliver Herrmann | Am Roten Stein 9A, 35216 Biedenkopf, Germany |
ABB France | +33.561151845 | http://www.abb.com | [email protected] | Serge Le Men | rue Paul Gauguin 33, 31100 Toulouse, France |
ABB LV Installation Materials Company Limited, Beijing | +86.1058085086 | http://www.abb.com.cn | [email protected] | Yuwei Dai | Kangding Street No. 17, 100176 Beijing, China |
ABB S.p.A. | +39.35395269 | http://www.abb.com/it | [email protected] | Marco Simonella | Via delle Industrie 18, 20010 Vittuone, Italy |
ABB Schweiz Ltd. | +41 58/5864566 | http://www.levyfils.ch | [email protected] | Tudor Baiatu | Fulachstrasse 150, 8201 Schaffhausen, Switzerland |
ABB Stotz-Kontakt GmbH | +49.62217011357 | http://www.abb.com/knx | [email protected] | Volker Biewendt | Eppelheimer Straße 82, 69123 Heidelberg, Germany |
ABB Xiamen Smart Technology Co., Ltd. | +86.5927616016 | https://new.abb.com/ | [email protected] | Sylvia Wang | FangShanXiEr Road No.881, 361000 Xiamen, China |
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.