Issue
hello guys I want to put more URL on a text file and read them 1 by 1 with the code below I want to open urls links from a text file, i want to run the script to extract data from 10 links for example not only from 1 how is the code created Thank you so much for help
import requests
from bs4 import BeautifulSoup
import csv
final_data = []
url = "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1"
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data, "html.parser")
get_details = soup.find_all(class_="result-row")
for details in get_details:
getclass = details.find_all(class_="hdrlnk")
for link in getclass:
link1 = link.get("href")
sublist = []
sublist.append(link1)
final_data.append(sublist)
print(final_data)
filename = "link.txt"
with open("./"+filename, "w") as csvfile:
csvfile = csv.writer(csvfile, delimiter = ",")
csvfile.writerow("")
for i in range(0, len(final_data)):
csvfile.writerow(final_data[i])
Solution
If you have every url in new line then simply open file, read all text and split on \n
to get list of lines (without (\n
))
with open('input.txt') as fh
text = fh.read()
all_links = text.split('\n')
or shorter
with open('input.txt') as fh
all_links = fh.read().split('\n')
And later you have to use for
-loop to run code for all urls
# - before loop -
final_data = []
# - loop -
for url in all_links:
# ... code ...
# - after loop -
print(final_data)
# ... write in csv ...
EDIT:
import requests
from bs4 import BeautifulSoup
import csv
# - before loop -
#all_links = [
# "https://denver.craigslist.org/search/cto?purveyor-input=owner&postedToday=1",
#]
with open('input.txt') as fh:
all_links = fh.read().split('\n')
final_data = []
# - loop -
for url in all_links:
print('url:', url)
response = requests.get(url)
#print('[DEBUG] code:', response.status_code)
soup = BeautifulSoup(response.text, "html.parser")
all_rows = soup.find_all(class_="result-row")
for row in all_rows:
all_links = row.find_all(class_="hdrlnk")
for link in all_links:
href = link.get("href")
final_data.append( [href] )
print(' >', href)
print('----------')
# - after loop -
#print(final_data)
filename = "output.csv" # no need to add `./`
with open(filename, "w") as csv_file:
csv_writer = csv.writer(csv_file, delimiter=",")
csv_writer.writerow( ["links"] )
csv_writer.writerows( final_data ) # with `s` at the end
Answered By - furas
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.