Issue
I dont get errors from the script below but this script returns no data. I am trying to get all the games for each of the weeks which start in table 4 in the html. When I enter the xpath commands in the scrapy shell I get data but once I put in the parse definition I dont get anything in return.
import scrapy
class NFLOddsSpider(scrapy.Spider):
name = 'NFLOdds'
allowed_domains = ['www.sportsoddshistory.com']
start_urls = ['https://www.sportsoddshistory.com/nfl-game-season/?y=2022']
def parse(self, response):
for row in response.xpath('//table[@class="soh1"]//tbody/tr'):
day = row.xpath('td[1]//text()').extract_first()
date = row.xpath('td[2]//text()').extract_first()
time = row.xpath('td[3]//text()').extract_first()
AtFav = row.xpath('td[4]//text()').extract_first()
favorite = row.xpath('td[5]//text()').extract_first()
score = row.xpath('td[6]//text()').extract_first()
spread = row.xpath('td[7]//text()').extract_first()
AtDog = row.xpath('td[8]//text()').extract_first()
underdog = row.xpath('td[9]//text()').extract_first()
OvUn = row.xpath('td[10]//text()').extract_first()
notes = row.xpath('td[11]//text()').extract_first()
week = row.xpath('//*[@id="content"]/div/table[4]/tbody/tr/td/h3').extract_first()
oddsTable = {
'day': day,
'date': date,
'time': time,
'AtFav': AtFav,
'favorite': favorite,
'score': score,
'spread': spread,
'AtDog': AtDog,
'underdog': underdog,
'OvUn': OvUn,
'notes': notes,
'week' : week
}
yield oddsTable
Solution
Updating answer to include Playoffs
table; code below
scrapy runspider NFLOddsSpider.py -O output.csv
to execute
import re
import scrapy
class NFLOddsSpider(scrapy.Spider):
name = 'NFLOdds'
allowed_domains = ['www.sportsoddshistory.com']
start_urls = ['https://www.sportsoddshistory.com/nfl-game-season/?y=2022']
def parse(self, response):
# Find all H3 tags
for h3 in response.xpath('//h3'):
h3_text = h3.xpath('./text()').get().strip()
# Find headings for Week/Playoffs tables
if (m := re.match(r'(\d{4}) Regular Season - Week (\d{1,2})', h3_text)):
# Week
year = m.group(1)
week = m.group(2)
# Pick out the one TABLE that immediatelly follows the H3 header
for tab in h3.xpath('.//following-sibling::table[1]'):
# Pick out the rows from that table
for row in tab.xpath('./tbody[1]/tr'):
yield {
'year': year,
'week': week,
'round': None,
'day': row.xpath('td[1]//text()').get(),
'date': row.xpath('td[2]//text()').get(),
'time_et': row.xpath('td[3]//text()').get(),
'fav_at': row.xpath('td[4]//text()').get(),
'favorite': row.xpath('td[5]//text()').get(),
'fav_cover_spread': bool(row.xpath('td[5]//a/b/text()').get()),
'score': row.xpath('td[6]//text()').get(),
'spread': row.xpath('td[7]//text()').get(),
'und_at': row.xpath('td[8]//text()').get(),
'underdog': row.xpath('td[9]//text()').get(),
'und_cover_spread': bool(row.xpath('td[9]//a/b/text()').get()),
'over_under': row.xpath('td[10]//text()').get(),
'notes': row.xpath('td[11]//text()').get()
}
elif (m := re.match(r'(\d{4}) Playoffs', h3_text)):
# Playoffs
year = m.group(1)
week = None
# Pick out the one TABLE that immediatelly follows the H3 header
for tab in h3.xpath('.//following-sibling::table[1]'):
# Pick out the rows from that table
for row in tab.xpath('./tbody[1]/tr'):
yield {
'year': year,
'week': week,
'round': row.xpath('td[1]//text()').get(),
'day': row.xpath('td[2]//text()').get(),
'date': row.xpath('td[3]//text()').get(),
'time_et': row.xpath('td[4]//text()').get(),
'fav_at': row.xpath('td[5]//text()').get(),
'favorite': row.xpath('td[6]//text()').get(),
'fav_cover_spread': bool(row.xpath('td[6]//a/b/text()').get()),
'score': row.xpath('td[7]//text()').get(),
'spread': row.xpath('td[8]//text()').get(),
'und_at': row.xpath('td[9]//text()').get(),
'underdog': row.xpath('td[10]//text()').get(),
'und_cover_spread': bool(row.xpath('td[10]//a/b/text()').get()),
'over_under': row.xpath('td[11]//text()').get(),
'notes': None
}
Answered By - Aleksandr Krymskiy
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.