Issue
I found a code online, currently I am trying to start it. But I have some issues in running. If someone would help me, it would be great. Thank You.
from concurrent.futures.thread import ThreadPoolExecutor
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import time
import random
import string
import re
chromeOptions = Options()
chromeOptions.headless = True #как будет запускаться хром - в фоне или нет
executor = ThreadPoolExecutor(20) #количество одновременных потоков
def generate_random_string(length):
letters = string.ascii_lowercase
rand_string = ''.join(random.choice(letters) for i in range(length))
return rand_string
# простейшая функция выгрузки всех ссылок с заданой страницы
def getlinks(url):
driver = webdriver.Chrome(executable_path="the path", options=chromeOptions) # path к chromedriver
list = []
driver.get(url)
a = driver.find_elements_by_xpath('.//a')
i = 0
for b in a:
i = i+1
link = b.get_attribute("href")
list.insert(i, link)
driver.quit()
return list
def scrape(url):
executor.submit(scraper, url)
executor.submit(scraper, a link/+generate_random_string(10))
#генерируем мусорные ссылки, если надо. кстати, если в тестируемом сайте есть функция поиска или любые другие страницы с тяжелыми запросами в БД, этот вариант - твой
def scraper(url):
driver = webdriver.Chrome(executable_path="the path", options=chromeOptions) #path к chromedriver
driver.get(url)
time.sleep(15)
driver.quit()
urls = getlinks("a link")
for url in urls * 10: #количество инстансов
scrape(url)
Error:
Traceback (most recent call last):
File "D:\PyCharm Community Edition 2021.2.1\plugins\python-ce\helpers\pydev\pydevd.py", line 1483, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "D:\PyCharm Community Edition 2021.2.1\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/User/PycharmProjects/project/main.py", line 15
letters = string.ascii_lowercase
^
IndentationError: expected an indented block
Where (a link) is written must be a target link and where (the path) it must be the path to chromedriver.exe
Solution
Good day!
Python uses indention to seperate code blocks. Each time a new scope is being entered, python interpreter expects an indented code block. This is how you should reformat your code. (Just to be sure I want to add that a proper selenium setup is required to run this code.)
from concurrent.futures.thread import ThreadPoolExecutor
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import time
import random
import string
import re
chromeOptions = Options()
chromeOptions.headless = True #как будет запускаться хром - в фоне или нет
executor = ThreadPoolExecutor(20) #количество одновременных потоков
def generate_random_string(length):
letters = string.ascii_lowercase
rand_string = ''.join(random.choice(letters) for i in
range(length))
return rand_string
# простейшая функция выгрузки всех ссылок с заданой страницы
def getlinks(url):
driver = webdriver.Chrome(executable_path="the path",
options=chromeOptions)
# path к chromedriver
list = []
driver.get(url)
a = driver.find_elements_by_xpath('.//a')
i = 0
for b in a:
i = i+1
link = b.get_attribute("href")
list.insert(i, link)
driver.quit()
return list
def scrape(url):
executor.submit(scraper, url)
executor.submit(scraper, a link/+generate_random_string(10))
#генерируем мусорные ссылки, если надо. кстати, если в тестируемом сайте
#есть функция поиска или любые другие страницы с тяжелыми запросами в БД,
#этот вариант - твой
def scraper(url):
driver = webdriver.Chrome(executable_path="the path",
options=chromeOptions)
#path к chromedriver
driver.get(url)
time.sleep(15)
driver.quit()
urls = getlinks("a link")
for url in urls * 10: #количество инстансов
scrape(url)
Answered By - Brakke Baviaan
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.