Issue
I'm new to web scraping and I want to scrape the information of all the events from a website Events in Oslo
I've written a sample code to scrape data which goes as:
'''A Python script to scrape data from 10times.com'''
import scrapy
import requests
class EventFinder(scrapy.Spider):
'''Creating a custom spider class'''
name = 'EventSpider' #name of the spider
start_urls = ['https://10times.com/oslo-no?datefrom=2020-08-01&dateto=2021-07-31']
custom_settings = {
'FEED_URI' : 'tmp/event_details.csv'
# All the scraped data will be stored in event_details.csv under temp folder
}
def fetch(url)
def parse(self, response):
'''Function to get each event url'''
event_url = response.css(".mb-0 .text-decoration-none::attr(href)")
for link in event_url:
yield response.follow(link.get(),callback = self.parse_links)
def parse_links(self, response):
'''function to scrape data and yield the data in a csv file'''
event_name = response.css("h1::text").extract()
event_date = response.css(".mb-0 span::text").extract()
event_timings = response.css("#hvrout1 td:nth-child(1)::text").extract()
event_location = response.css("#map_dirr span , #map_dirr h3").css("::text").extract()
event_type = response.css("#hvrout2::text").extract()
event_tags = response.css("#hvrout2 a::text").extract()
for item in zip(event_name,event_date,event_timings,event_location,event_type,event_tags):
scraped_info = {
'Event Name' : item[0],
'Date' : item[1],
'Timings' : item[2],
'Location' : item[3],
'Event Type' : item[4],
'Event Tags' : item[5],
}
yield scraped_info
The code I've written is able to scrape in data for all the events that are listed on first page but as we scroll down the page the page loads more data dynamically via Ajax GET requests and it is not able to scrape that data. I've watched some of the videos and read some articles to but I was not able to figure out how can I scroll data that is being generated dynamically on scrolling. Any help on this will be appreciated.
Solution
'''A Python script to scrape data from 10times.com'''
import scrapy
import requests
class EventFinder(scrapy.Spider):
name = 'EventSpider' # name of the spider
#start_urls = ['https://10times.com/oslo-no?datefrom=2020-08-01&dateto=2021-07-31']
url = 'https://10times.com/ajax?for=scroll&path=/oslo-no&datefrom=2020-08-01&dateto=2021-07-31&ajax=1&page='
page = 1
start_urls = [url + str(page)]
custom_settings = {
'FEED_URI': 'tmp/event_details.csv'
# All the scraped data will be stored in event_details.csv under temp folder
}
def parse(self, response):
'''Function to get each event url'''
event_url = response.css(".mb-0 .text-decoration-none::attr(href)")
for link in event_url:
yield response.follow(link.get(), callback=self.parse_links)
# ONLY TWO PAGES
next_page = self.url + str(self.page+1)
yield scrapy.Request(next_page, callback=self.parse)
def parse_links(self, response):
'''function to scrape data and yield the data in a csv file'''
event_name = response.css("h1::text").extract()
event_date = response.css(".mb-0 span::text").extract()
event_timings = response.css("#hvrout1 td:nth-child(1)::text").extract()
event_location = response.css("#map_dirr span , #map_dirr h3").css("::text").extract()
event_type = response.css("#hvrout2::text").extract()
event_tags = response.css("#hvrout2 a::text").extract()
for item in zip(event_name, event_date, event_timings, event_location, event_type, event_tags):
scraped_info = {
'Event Name': item[0],
'Date': item[1],
'Timings': item[2],
'Location': item[3],
'Event Type': item[4],
'Event Tags': item[5],
}
yield scraped_info
Output:
{'Event Name': 'Nasjonale Konferanse Om Hjerneslag', 'Date': '18 - 19 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Trade Show', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Education Fair in Oslo', 'Date': '17 - 18 Feb 2021', 'Timings': ' 10:00 AM - 07:00 PM (General)\n ', 'Location': '\n Oslo Spektrum', 'Event Type': ' Trade Show', 'Event Tags': 'Education & Training'}
{'Event Name': 'EAAE Deans Summit', 'Date': '22 - 23 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'NAFEMS Physics Based Digital Twins', 'Date': '23 - 24 Mar 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'Oslo Life Science Conference', 'Date': '15 - 18 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': 'European Academy of Paediatric Dentistry Interim seminar', 'Date': '23 - 24 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'GLOBVAC Conference', 'Date': '20 - 21 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel The Hub', 'Event Type': ' Conference', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'European Conference on Community Psychology', 'Date': '03 - 04 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Red Cross Conference Center', 'Event Type': ' Conference', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'Baltic Nordic Acoustics Meeting', 'Date': '03 - 05 May 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'The European Port House Conference', 'Date': '27 - 28 May 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Petroleum Systems Conference', 'Date': '02 - 03 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Human Factors AS', 'Event Type': ' Conference', 'Event Tags': 'Power & Energy'}
{'Event Name': 'Oslo Yoga Festival', 'Date': '29 - 31 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Sagene samfunnshus', 'Event Type': ' Trade Show', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'NUGA Conference', 'Date': '28 - 30 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Clarion Hotel The Hub', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Anti-Corruption Nordics', 'Date': '26 - 28 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'Software', 'Date': '10 - 11 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Joint Conference on Metallurgical and Materials Engineering', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'International Conference on Frontiers of Chemical Materials and Process', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': 'International Conference on Material Engineering and Advanced Manufacturing Technology', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': '600Minutes Executive IT', 'Date': ' 02 Dec 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': "IDC's Multicloud Conference", 'Date': ' 18 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Gamle Logen - Selskapslokaler Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'European Intelligence and Security Informatics Conference', 'Date': '10 - 11 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Security & Defense'}
{'Event Name': 'Digitalization of Automation Systems', 'Date': '25 - 26 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Meet Ullevaal', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'Annual Privacy Forum', 'Date': '17 - 18 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Association of Lighting Designers Enlighten Europe', 'Date': '18 - 20 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Building & Construction'}
{'Event Name': 'Tedx Oslo', 'Date': ' 12 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Oslo World Music Festival', 'Date': '27 Oct - 01 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Visitor Centre', 'Event Type': ' Trade Show', 'Event Tags': 'Entertainment & Media'}
{'Event Name': 'Nordic Educational Meeting', 'Date': '10 - 11 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'Nordic Place Branding Conference', 'Date': ' 26 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n RĂ„dhuset', 'Event Type': ' Conference', 'Event Tags': 'Banking & Finance'}
{'Event Name': 'Specsavers Clinical Conference', 'Date': ' 13 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'IDC Future of Work conference', 'Date': ' 28 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Gamle Logen - Selskapslokaler Oslo', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'CMO Executive Forum NO', 'Date': ' 27 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Ekebergrestauranten', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': "EARMA's Annual Conference", 'Date': '29 Sep - 01 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Conference', 'Event Tags': 'Banking & Finance'}
{'Event Name': 'EOCCS Learning Community Symposium', 'Date': '24 - 25 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'BI Norwegian Business School', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'CHFR Symposium', 'Date': '23 - 25 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Scandic Holmenkollen Park', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Nordic and Baltic Stata Conference', 'Date': ' 24 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Cancer Registry Norway', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': '600Minutes CFO', 'Date': ' 13 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel Oslo', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Healthy Buildings Europe', 'Date': '21 - 23 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Inland Norway University of Applied Sciences', 'Event Type': ' Conference', 'Event Tags': 'Building & Construction'}
{'Event Name': 'Access MBA Tour Oslo', 'Date': ' 24 Sep 2020', 'Timings': ' 04:30 PM - 09:30 PM', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Oslo Urban Arena', 'Date': '10 - 11 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Samfunnssalen Event & Konferanse', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'World Congress on Cancer', 'Date': '14 - 16 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Soria Moria hotell og konferansesenter', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'International Conference on Defects in Semiconductors', 'Date': '26 - 30 Jul 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Electric & Electronics'}
{'Event Name': 'International Conference on Ict Systems Security and Privacy Protection', 'Date': '22 - 24 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Conference on Intelligent Information Systems', 'Date': '17 - 18 Jul 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Scandic KNA Hotel', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
Answered By - SuperUser
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.