Wednesday, April 6, 2022

[FIXED] Parse second/third page and add to list with BeautifulSoup

April 06, 2022 beautifulsoup, django, python No comments

Issue

I am trying to scrape a website for recipes and then present a page with a random pick. For this I have made a piece of code that works perfect when I just get the first page, 35 recipes. However: I want to grab the recipes from the 2nd and 3rd page as well. I figured I should write a loop for this but I can't seem to get it right. What did I do wrong in this code?

from django.shortcuts import render
import requests
import re
from bs4 import BeautifulSoup
import random

# Create your views here.
def recipe(request):

#Create soup
    page = 0
    while page != 2:
        webpage_response = requests.get("https://www.ah.nl/allerhande/recepten-zoeken?page=" + str(page))
        webpage = webpage_response.content
        soup = BeautifulSoup(webpage, "html.parser")  
        recipe_links = soup.find_all('a', attrs={'class' : re.compile('^display-card_root__.*')})
        recipe_pictures = soup.find_all('img', attrs={'class' : re.compile('^card-image-set_imageSet__.*')})
        recipe_prep_time = [ul.find('li').text 
                   for ul in soup.find_all('ul',
                        attrs={'class': re.compile('^recipe-card-properties_root')})]


#Set up lists
        links = []
        titles = []
        pictures = []

#create prefix for link
        prefix = "https://ah.nl"

#scrape page for recipe
        for link in recipe_links:
            links.append(prefix + link.get('href'))

        for title in recipe_links:
            titles.append(title.get('aria-label'))

        for img in recipe_pictures:
            pictures.append(img.get('data-srcset'))

        page = page +1

#create random int to select a recipe
    nummer = random.randint(0,105)
    print(nummer)

#select correct link for image
    pic_url = pictures[nummer].split(' ')

#create context
    context = {
        "titles" : titles[nummer],
        "pictures" : pic_url[16],
        "preptime" : recipe_prep_time[nummer],
        "link" : links[nummer]
    }

#render page
    return render(request, "randomRecipe/recipe.html", context)

Solution

You can make pagination using for loop

from django.shortcuts import render
import requests
import re
from bs4 import BeautifulSoup
import random

# Create your views here.
def recipe(request):

#Create soup
    for page in range(1,4):
        webpage_response = requests.get(f"https://www.ah.nl/allerhande/recepten-zoeken?page={page}" )
        webpage = webpage_response.content
        soup = BeautifulSoup(webpage, "html.parser")  
        recipe_links = soup.find_all('a', attrs={'class' : re.compile('^display-card_root__.*')})
        recipe_pictures = soup.find_all('img', attrs={'class' : re.compile('^card-image-set_imageSet__.*')})
        recipe_prep_time = [ul.find('li').text 
                   for ul in soup.find_all('ul',
                        attrs={'class': re.compile('^recipe-card-properties_root')})]


#Set up lists
        links = []
        titles = []
        pictures = []

#create prefix for link
        prefix = "https://ah.nl"

#scrape page for recipe
        for link in recipe_links:
            links.append(prefix + link.get('href'))

        for title in recipe_links:
            titles.append(title.get('aria-label'))

        for img in recipe_pictures:
            pictures.append(img.get('data-srcset'))

        

#create random int to select a recipe
    nummer = random.randint(0,105)
    print(nummer)

#select correct link for image
    pic_url = pictures[nummer].split(' ')

#create context
    context = {
        "titles" : titles[nummer],
        "pictures" : pic_url[16],
        "preptime" : recipe_prep_time[nummer],
        "link" : links[nummer]
    }

#render page
    return render(request, "randomRecipe/recipe.html", context)

Answered By - F.Hoque

This Answer collected from stackoverflow and tested by PythonFixing community admins, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Wednesday, April 6, 2022

[FIXED] Parse second/third page and add to list with BeautifulSoup

Issue

Solution

0 comments:

Post a Comment

Popular Posts

Labels