Issue
I am trying to add a GUI to my web scraper script but I'm running into some problems. Whenever I submit something into the text box and press the submit button, it displays the glutenous of it but then it immediately opens a new. An identical one to the one before inputting anything. What I want it to do is to not open a new window so that I can press the "Again" putting to clear the input and output but when it opens a new window any input to the old window is ignored.
import scrapy
from scrapy.crawler import CrawlerProcess
import colorama
from colorama import Fore, Back, Style
import re
from scrapy import exceptions
import PySimpleGUI as sg
import sys
GlutenFreeKeyWords = [
"vete", "gluten", "råg", "korn", "kamut", "dinkel", "vetekli", "kruskakli", "spelt", "durum", "havregryn",
"mannagryn"
]
re_glutenFreeKeyWords = re.compile("|".join(GlutenFreeKeyWords))
GlutenFree = False
def convertTuple(tup):
# initialize an empty string
str = ''
for item in tup:
str = str + item + ", "
return str
class Spider(scrapy.Spider):
ingredients = ""
name = "ICAScraper"
i = 0
productName = ""
def start_requests(self):
urls = [InputURL]
for url in urls:
yield scrapy.Request(url=url, callback=self.on_response)
def on_response(self, response):
self.CheckForIngredients(response, headerNumber=self.SearchHeaders(response))
def SearchHeaders(self, response):
global productName
productName = (response.xpath(
"/html/body/div[1]/div/div[1]/div[2]/main/div/div[1]/div/div[2]/div/div[1]/h1/text()").get())
i = 0
while i < 10:
headerTitle = (response.xpath(
"/html/body/div[1]/div/div[1]/div[2]/main/div/div[2]/div/div/div/div[" + str(i) + "]/h2").get())
if headerTitle is None:
i += 1
continue
elif "Ingredienser" in headerTitle:
print("Found \"Ingredienser\" in header: " + str(i))
headerNumber = i
return headerNumber
break
else:
i += 1
print(Style.BRIGHT + Fore.RED + "NO INGREDIENTS FOUND")
raise scrapy.exceptions.CloseSpider("No ingredients found")
def CheckForIngredients(self, response, headerNumber):
if response.xpath(
"/html/body/div[1]/div/div[1]/div[2]/main/div/div[1]/div/div[2]/div[2]/div/div/div/div[1]/div/div/span/text()").get() == "Glutenfritt":
print(Fore.GREEN + "Gluten Free")
print(Style.RESET_ALL)
GlutenFree = True
ingredients = (response.xpath("/html/body/div[1]/div/div[1]/div[2]/main/div/div[2]/div/div/div/div[" + str(
headerNumber) + "]/div/text()").get())
ingredients = str(ingredients).lower()
if re_glutenFreeKeyWords.search(ingredients):
GlutenFree = False
self.PrintResult(ingredients, GlutenFree)
else:
GlutenFree = True
self.PrintResult(ingredients, GlutenFree)
def PrintResult(self, ingredients, GlutenFree):
print(Style.BRIGHT + Fore.BLUE + "Product: " + Fore.YELLOW + productName + Style.RESET_ALL)
if GlutenFree:
print(Fore.BLUE + "Result: " + Fore.GREEN + "Gluten Free")
print(Style.RESET_ALL)
print("Just to make sure, here are the ingredients: " + ingredients)
window['-OUTPUT-'].update('Gluten Free')
window["Again"].update(visible=True, disabled=False)
else:
print(Fore.BLUE + "Result: " + Fore.RED + "Not Gluten Free")
print(Style.RESET_ALL)
print("Here are the ingredients: " + ingredients)
print(
"Here are the marked, potentially gluten containing ingredients: " +
Fore.RED + convertTuple(re_glutenFreeKeyWords.findall(ingredients)) +
Style.RESET_ALL)
window['-OUTPUT-'].update('Not Gluten Free')
window["Again"].update(visible=True, disabled=False)
c = CrawlerProcess({
'USER_AGENT': 'Mozilla/5.0',
'LOG_LEVEL': 'WARNING',
'REQUEST_FINGERPRINTER_IMPLEMENTATION': '2.7',
})
while True:
layout = [[sg.Text("Input Link")],
[sg.Input(key='-INPUT-', do_not_clear=False)],
[sg.Text(size=(40, 1), key='-OUTPUT-')],
[sg.Button('Submit'), sg.Button('Quit')],
[sg.Button('Again', disabled=True, visible=False)]]
window = sg.Window('Gluten Free Checker', layout)
event, values = window.read()
global InputURL
InputURL = values["-INPUT-"]
# End program if user closes window or
# presses the OK button
if event == sg.WIN_CLOSED or event == 'Quit':
sys.exit()
elif event == 'Submit':
print(InputURL)
c.crawl(Spider)
c.start()
elif event == "Again":
print("Again")
window.close()
I don't really know what to try because anything I google just tells me how to work with multiple windows, which isn't what I want. I don't know how to prevent the new window from opening and how to keep the old windows input.
Solution
You are creating the layout and window inside of your while True
loop. So every time it iterates the loop it creates a brand new carbon copy of the layout and all of the widgets.
If you only want it to produce one window and simply cycle through the input process then all you need to do is pull the window creation code out of the while loop.
For example:
layout = [[sg.Text("Input Link")],
[sg.Input(key='-INPUT-', do_not_clear=False)],
[sg.Text(size=(40, 1), key='-OUTPUT-')],
[sg.Button('Submit'), sg.Button('Quit')],
[sg.Button('Again', disabled=True, visible=False)]]
window = sg.Window('Gluten Free Checker', layout)
while True:
event, values = window.read()
global InputURL
InputURL = values["-INPUT-"]
# End program if user closes window or
# presses the OK button
if event == sg.WIN_CLOSED or event == 'Quit':
sys.exit()
elif event == 'Submit':
print(InputURL)
c.crawl(Spider)
c.start()
elif event == "Again":
print("Again")
window.close()
Unfortunately this will only solve the issue of creating multiple windows. This fix does not address the fact that twisted
reactors cannot be restarted, which is the error you would receive upon submitting a second url
input.
Answered By - Alexander
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.