Issue
import scrapy
from scrapy.http import Request
from bs4 import BeautifulSoup
class TestSpider(scrapy.Spider):
name = 'test'
start_urls = ['https://www.baroul-bucuresti.ro/tablou-definitivi']
page_number = 1
def parse(self, response):
base_url='https://www.baroul-bucuresti.ro'
soup=BeautifulSoup(response.text, 'html.parser')
tra = soup.find_all('div',class_='panel-title')
productlinks=[]
for links in tra:
for link in links.find_all('a',href=True)[1:]:
comp=base_url+link['href']
yield Request(comp, callback=self.parse_book)
def parse_book(self, response):
# header=response.xpath("//div[@class='av_bot_left left']")
# for k in header:
# title=k.xpath("//h1//text()").get()
# title=title.strip()
# dec=k.xpath("//p[@class='ral_r f16']//text()").get()
# dec=dec.strip()
d1=''
d2=''
d3=''
d4=''
d5=''
detail=response.xpath("//div[@class='av_bot_left left']//p")
for i in range(len(detail)):
if 'Decizia de intrare:' in detail[i].get():
d1=detail[i].xpath('.//text()').getall()
d1 = [i.strip() for i in d1 if i.strip()][-1]
print(d1)
elif 'Telefon:' in detail[i].get():
d2=detail[i].xpath('.//text()').getall()
d2 = [i.strip() for i in d2 if i.strip()][-1]
print(d2)
This is my output. I want to remove the dot from the phone number and replace 0 With +
. This is the page link https://www.baroul-bucuresti.ro/avocat/15655/aanegroae-ana-maria
0752.172.817
I want the output to look like this:
+752 172 817
Solution
You can use the replace()
function to solve the part of changing dots by spaces, also you need to slice the first character of the string to remove the first zero:
out = "0752.172.817"
out = "+ " + out[1:].replace(".", " ")
print(out)
Note that you must not use replace()
to remove the first zero as the phone number could contain a zero and be replaced by a +
character
Output:
+ 752 172 817
Answered By - Cardstdani
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.