Issue
How to use selenium
with google chrome
to scrape websites?
And what about virtualenv
? Is it required? Why to use it/why not to use virtualenv
?
#Install Google Chrome
wget -c wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
dpkg -i google-chrome-stable_current_amd64.deb
apt-get -f install
#Install Selenium
apt-get install python-dev python-pip
pip install selenium
#selenium_scrape.py
Simple script to check if its working
import time
from selenium import webdriver
driver = webdriver.Chrome()
time.sleep(5)
driver.quit()
#Command
python selenium_scrape.py
#Error
Traceback (most recent call last):
File "selenium_scrape.py", line 4, in <module>
driver = webdriver.Chrome('/lib/modules/3.16.0-4-amd64/kernel/drivers/platform/chrome')
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/chrome/webdriver.py", line 61, in __init__
self.service.start()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/common/service.py", line 74, in start
os.path.basename(self.path), self.start_error_message)
selenium.common.exceptions.WebDriverException: Message: 'chrome' executable may have wrong permissions. Please see https://sites.google.com/a/chromium.org/chromedriver/home
Exception AttributeError: "'Service' object has no attribute 'process'" in <bound method Service.__del__ of <selenium.webdriver.chrome.service.Service object at 0x7f88e9347190>> ignored
#The full script
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def init_driver():
driver = webdriver.Chrome()
driver.wait = WebDriverWait(driver, 5)
return driver
def lookup(driver, query):
driver.get("http://www.google.com")
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, "q")))
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, "btnK")))
box.send_keys(query)
button.click()
except TimeoutException:
print("Box or Button not found in google.com")
if __name__ == "__main__":
driver = init_driver()
lookup(driver, "Selenium")
time.sleep(5)
driver.quit()
Solution
The question is (at the moment) about an indentation error. This can be easily fixed:
def lookup(driver, query):
driver.get("http://www.google.com")
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, "q")))
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, "btnK")))
box.send_keys(query)
button.click()
except TimeoutException:
print("Box or Button not found in google.com")
Answered By - flaschbier
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.