Issue
To analyze the prices of different products I have created a function that downloads them through the selenium library, however, when I execute the routine step by step it works fine, but when I try to encapsulate it in a function it saves only the last value.
this is my code
import random # Para generar números aleatorios
import pandas as pd
import numpy as np
from time import sleep # Para detener la ejecución de los programas durante un tiempo definido
from selenium import webdriver # Para ejecutar acciones a través de un navegador web
from selenium.webdriver.chrome.service import Service #Para que webdriver seleccione automáticamente el driver
from webdriver_manager.chrome import ChromeDriverManager # Para llamar el driver que permite controlar Google Chrome
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from types import ModuleType, FunctionType # Para la función que remueve los elementos del environment
def scraping_jumbo(url):
#Descarga la versión más actualizada del driver de Chrome
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
#Abre el navegador según la url especificada
driver.get(url)
#Detiene el algoritmo mientras carga la página entre 10 y 15 segundos
sleep(random.uniform(10.0, 15.0))
#Recorre el código html de la página para encontrar "shelf-item"
items = driver.find_elements(By.CLASS_NAME,"shelf-item")
#Crea diferentes arreglos vacíos a rellenar en el ciclo for
data = []
for item in items:
titulo=item.find_element(By.CLASS_NAME, "shelf-product-title-text").text
print(titulo)
marca=item.find_element(By.CLASS_NAME, "shelf-product-brand").text
try:
precio=item.find_element("xpath", './/span[@class="price-best"] | .//span[@class="product-sigle-price-wrapper"]').text
except NoSuchElementException:
np.nan
data.append({'titulo': titulo,
'marca': marca,
'precio': precio})
print(data)
df=pd.DataFrame(data)
return df
data_jumbo = scraping_jumbo("https://www.jumbo.cl/mascotas?page=1")
Could you help me?
Solution
Check the indent of the pd.DataFrame
line. Move it outside of the for loop.
Answered By - winderland
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.