Issue
When I make the csv file data are overwrite in csv file If there is any solution provide me the link of the page is https://www.aeafa.es/asociados.php?provinput=&_pagi_pg=1 have already searched for an answer here and spent a long time on google, but nothing... I've already tried opening the file with 'w' instead of 'r' or 'a' but I still can't get my code to
import requests
from bs4 import BeautifulSoup
import pandas as pd
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
}
for page in range(1,3):
r =requests.get('https://www.aeafa.es/asociados.php?provinput=&_pagi_pg={page}'.format(page=page),
headers=headers)
soup=BeautifulSoup(r.content, 'lxml')
tag=soup.find_all('div',class_='col-md-8 col-sm-8')
temp=[]
for pro in tag:
data=[tup.text for tup in pro.find_all('p')]
Dirección=data[2]
Dirección=Dirección[12:]
Población=data[3]
Población=Población[14:]
Provincia=data[4]
Provincia=Provincia[14:]
Teléfono=data[5]
Teléfono="+" + Teléfono[11:].replace('.', "")
Email=data[6]
Email=Email[10:]
temp.append([Dirección,Provincia,Población,Teléfono, Email])
df=pd.DataFrame(temp,columns=["Dirección","Provincia","Población","Teléfono","Email"])
df.to_csv('samp.csv')
Solution
Try to put the list temp
outside of the for-loop. Then, create the dataframe after all the loops finish:
import requests
import pandas as pd
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"
}
temp = []
for page in range(1, 3):
r = requests.get(
"https://www.aeafa.es/asociados.php?provinput=&_pagi_pg={page}".format(
page=page
),
headers=headers,
)
soup = BeautifulSoup(r.content, "lxml")
tag = soup.find_all("div", class_="col-md-8 col-sm-8")
for pro in tag:
data = [tup.text for tup in pro.find_all("p")]
Dirección = data[2]
Dirección = Dirección[12:]
Población = data[3]
Población = Población[14:]
Provincia = data[4]
Provincia = Provincia[14:]
Teléfono = data[5]
Teléfono = "+" + Teléfono[11:].replace(".", "")
Email = data[6]
Email = Email[10:]
temp.append([Dirección, Provincia, Población, Teléfono, Email])
df = pd.DataFrame(
temp, columns=["Dirección", "Provincia", "Población", "Teléfono", "Email"]
)
df.to_csv("samp.csv")
print(len(df))
Prints:
98
Screenshot from LibreOffice:
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.