Issue
I want to store my url in the variable name "url" to save the url in an excel sheet CSV but giving me unboundlocalerror local variable 'url' referenced before the assignment.
class NewsSpider(scrapy.Spider): name = "articles"
def start_requests(self):
url = input("Enter the article url: ")
yield scrapy.Request(url, callback=self.parse_dir_contents)
def parse_dir_contents(self, response):
url = url
yield{
'Category':Category,
'Headlines':Headlines,
'Author': Author,
'Source': Source,
'Publication Date': Published_Date,
'Feature_Image': Feature_Image,
'Skift Take': skift_take,
'Article Content': Content
}
# =============== Data Store +++++++++++++++++++++
Data = [[Category,Headlines,Author,Source,Published_Date,Feature_Image,Content,url]]
try:
df = pd.DataFrame (Data, columns = ['Category','Headlines','Author','Source','Published_Date','Feature_Image','Content','URL'])
print(df)
with open('C:/Users/Public/pagedata.csv', 'a') as f:
df.to_csv(f, header=False)
except:
df = pd.DataFrame (Data, columns = ['Category','Headlines','Author','Source','Published_Date','Feature_Image','Content','URL'])
print(df)
df.to_csv('C:/Users/Public/pagedata.csv', mode='a')
Solution
You can just invoke response.url
instead of url = url
url = response.url
#OR
def parse_dir_contents(self, response):
yield{
'Category':Category,
'Headlines':Headlines,
'Author': Author,
'Source': Source,
'Publication Date': Published_Date,
'Feature_Image': Feature_Image,
'Skift Take': skift_take,
'Article Content': Content,
'url': response.url
}
Answered By - Fazlul
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.