Issue
import requests
from bs4 import BeautifulSoup
import csv
from itertools import zip_longest
job_title = []
company_name = []
location_name = []
job_skill = []
links = []
salary = []
result = requests.get("https://www.indeed.com/jobs?q=web+development&l=&from=searchOnHP")
source = result.content
soup = BeautifulSoup(source, "lxml")
job_titles = soup.find_all("a", {"class", "jcs-JobTitle"})
company_names = soup.find_all("span", {"class": "companyName"})
location_names = soup.find_all("div", {"class": "companyLocation"})
job_skills = soup.find_all("div", {"class": "job-snippet"})
for i in range(len(job_titles)):
job_title.append(job_titles[i].text.strip())
links.append("https://www.indeed.com" + job_titles[i].attrs["href"])
company_name.append(company_names[i].text.strip())
location_name.append(location_names[i].text.strip())
job_skill.append(job_skills[i].text.strip())
for link in links:
result = requests.get(link)
source = result.content
soup = BeautifulSoup(source, "lxml")
salaries = soup.find("span", {"class": "icl-u-xs-mr--xs attribute_snippet"})
salary.append(salaries.text)
my_file = [job_title, company_name, location_name, job_skill, salary]
exported = zip_longest(*my_file)
with open("/Users/Rich/Desktop/testing/indeed.csv", "w") as myfile:
writer = csv.writer(myfile)
writer.writerow(["Job titles", "Company names", "Location names", "Job skills", "salaries"])
writer.writerows(exported)
i was scraping this website to get the job titles, company names, location names, job skills, and the salaries by scraping every page of them and it works when i print the commend: salaries = soup.find("span", {"class": "icl-u-xs-mr--xs attribute_snippet"}) but when i try to append only the text from it i get this error: AttributeError: 'NoneType' object has no attribute 'text' please help me to get this fixed and it will be appreciated.
Solution
Main issue is that there is not always a salary, so you have to handle this e.g. with an if condition
and you could also avoid all these list.
salary.text if salary else None
Example
import requests
from bs4 import BeautifulSoup
result = requests.get("https://www.indeed.com/jobs?q=web+development&l=&from=searchOnHP")
source = result.content
soup = BeautifulSoup(source, "lxml")
data = []
for e in soup.select('ul.jobsearch-ResultsList .slider_item'):
salary = e.find("div",{"class": "salary-snippet-container"})
data.append({
'title': e.find("a", {"class", "jcs-JobTitle"}).get_text(strip=True),
'company': e.find("span", {"class": "companyName"}).get_text(strip=True),
'location': e.find("div", {"class": "companyLocation"}).get_text(strip=True),
'skills': e.find("div", {"class": "job-snippet"}).get_text(strip=True),
'salary': salary.text if salary else None
})
with open('indeed.csv', 'w', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = data[0].keys())
writer.writeheader()
writer.writerows(data)
Answered By - HedgeHog
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.